diff options
Diffstat (limited to 'contrib/llvm/lib/CodeGen')
233 files changed, 20206 insertions, 10619 deletions
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp index ffcb9a09ad73..632ea8e9cdc4 100644 --- a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -25,7 +25,6 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/TargetInstrInfo.h" @@ -35,6 +34,7 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/MachineValueType.h" #include "llvm/Support/raw_ostream.h" #include <cassert> #include <map> @@ -139,10 +139,11 @@ AggressiveAntiDepBreaker::AggressiveAntiDepBreaker( CriticalPathSet |= CPSet; } - DEBUG(dbgs() << "AntiDep Critical-Path Registers:"); - DEBUG(for (unsigned r : CriticalPathSet.set_bits()) - dbgs() << " " << printReg(r, TRI)); - DEBUG(dbgs() << '\n'); + LLVM_DEBUG(dbgs() << "AntiDep Critical-Path Registers:"); + LLVM_DEBUG(for (unsigned r + : CriticalPathSet.set_bits()) dbgs() + << " " << printReg(r, TRI)); + LLVM_DEBUG(dbgs() << '\n'); } AggressiveAntiDepBreaker::~AggressiveAntiDepBreaker() { @@ -202,9 +203,9 @@ void AggressiveAntiDepBreaker::Observe(MachineInstr &MI, unsigned Count, PrescanInstruction(MI, Count, PassthruRegs); ScanInstruction(MI, Count); - DEBUG(dbgs() << "Observe: "); - DEBUG(MI.dump()); - DEBUG(dbgs() << "\tRegs:"); + LLVM_DEBUG(dbgs() << "Observe: "); + LLVM_DEBUG(MI.dump()); + LLVM_DEBUG(dbgs() << "\tRegs:"); std::vector<unsigned> &DefIndices = State->GetDefIndices(); for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) { @@ -215,16 +216,16 @@ void AggressiveAntiDepBreaker::Observe(MachineInstr &MI, unsigned Count, // conservative location (i.e. the beginning of the previous // schedule region). if (State->IsLive(Reg)) { - DEBUG(if (State->GetGroup(Reg) != 0) - dbgs() << " " << printReg(Reg, TRI) << "=g" << - State->GetGroup(Reg) << "->g0(region live-out)"); + LLVM_DEBUG(if (State->GetGroup(Reg) != 0) dbgs() + << " " << printReg(Reg, TRI) << "=g" << State->GetGroup(Reg) + << "->g0(region live-out)"); State->UnionGroups(Reg, 0); } else if ((DefIndices[Reg] < InsertPosIndex) && (DefIndices[Reg] >= Count)) { DefIndices[Reg] = Count; } } - DEBUG(dbgs() << '\n'); + LLVM_DEBUG(dbgs() << '\n'); } bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr &MI, @@ -313,7 +314,7 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx, // subregister definitions). for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) if (TRI->isSuperRegister(Reg, *AI) && State->IsLive(*AI)) { - DEBUG(if (!header && footer) dbgs() << footer); + LLVM_DEBUG(if (!header && footer) dbgs() << footer); return; } @@ -322,9 +323,11 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx, DefIndices[Reg] = ~0u; RegRefs.erase(Reg); State->LeaveGroup(Reg); - DEBUG(if (header) { - dbgs() << header << printReg(Reg, TRI); header = nullptr; }); - DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << tag); + LLVM_DEBUG(if (header) { + dbgs() << header << printReg(Reg, TRI); + header = nullptr; + }); + LLVM_DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << tag); // Repeat for subregisters. Note that we only do this if the superregister // was not live because otherwise, regardless whether we have an explicit // use of the subregister, the subregister's contents are needed for the @@ -336,15 +339,17 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx, DefIndices[SubregReg] = ~0u; RegRefs.erase(SubregReg); State->LeaveGroup(SubregReg); - DEBUG(if (header) { - dbgs() << header << printReg(Reg, TRI); header = nullptr; }); - DEBUG(dbgs() << " " << printReg(SubregReg, TRI) << "->g" << - State->GetGroup(SubregReg) << tag); + LLVM_DEBUG(if (header) { + dbgs() << header << printReg(Reg, TRI); + header = nullptr; + }); + LLVM_DEBUG(dbgs() << " " << printReg(SubregReg, TRI) << "->g" + << State->GetGroup(SubregReg) << tag); } } } - DEBUG(if (!header && footer) dbgs() << footer); + LLVM_DEBUG(if (!header && footer) dbgs() << footer); } void AggressiveAntiDepBreaker::PrescanInstruction( @@ -367,14 +372,15 @@ void AggressiveAntiDepBreaker::PrescanInstruction( HandleLastUse(Reg, Count + 1, "", "\tDead Def: ", "\n"); } - DEBUG(dbgs() << "\tDef Groups:"); + LLVM_DEBUG(dbgs() << "\tDef Groups:"); for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; unsigned Reg = MO.getReg(); if (Reg == 0) continue; - DEBUG(dbgs() << " " << printReg(Reg, TRI) << "=g" << State->GetGroup(Reg)); + LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI) << "=g" + << State->GetGroup(Reg)); // If MI's defs have a special allocation requirement, don't allow // any def registers to be changed. Also assume all registers @@ -383,7 +389,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction( // can tell user specified registers from compiler-specified. if (MI.isCall() || MI.hasExtraDefRegAllocReq() || TII->isPredicated(MI) || MI.isInlineAsm()) { - DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)"); + LLVM_DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)"); State->UnionGroups(Reg, 0); } @@ -393,8 +399,8 @@ void AggressiveAntiDepBreaker::PrescanInstruction( unsigned AliasReg = *AI; if (State->IsLive(AliasReg)) { State->UnionGroups(Reg, AliasReg); - DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << "(via " - << printReg(AliasReg, TRI) << ")"); + LLVM_DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << "(via " + << printReg(AliasReg, TRI) << ")"); } } @@ -406,7 +412,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction( RegRefs.insert(std::make_pair(Reg, RR)); } - DEBUG(dbgs() << '\n'); + LLVM_DEBUG(dbgs() << '\n'); // Scan the register defs for this instruction and update // live-ranges. @@ -437,7 +443,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction( void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Count) { - DEBUG(dbgs() << "\tUse Groups:"); + LLVM_DEBUG(dbgs() << "\tUse Groups:"); std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& RegRefs = State->GetRegRefs(); @@ -448,11 +454,11 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI, // FIXME: The issue with predicated instruction is more complex. We are being // conservatively here because the kill markers cannot be trusted after // if-conversion: - // %r6 = LDR %sp, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14] + // %r6 = LDR %sp, %reg0, 92, 14, %reg0; mem:LD4[FixedStack14] // ... - // STR %r0, killed %r6, %reg0, 0, pred:0, pred:%cpsr; mem:ST4[%395] - // %r6 = LDR %sp, %reg0, 100, pred:0, pred:%cpsr; mem:LD4[FixedStack12] - // STR %r0, killed %r6, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8) + // STR %r0, killed %r6, %reg0, 0, 0, %cpsr; mem:ST4[%395] + // %r6 = LDR %sp, %reg0, 100, 0, %cpsr; mem:LD4[FixedStack12] + // STR %r0, killed %r6, %reg0, 0, 14, %reg0; mem:ST4[%396](align=8) // // The first R6 kill is not really a kill since it's killed by a predicated // instruction which may not be executed. The second R6 def may or may not @@ -469,7 +475,8 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Reg = MO.getReg(); if (Reg == 0) continue; - DEBUG(dbgs() << " " << printReg(Reg, TRI) << "=g" << State->GetGroup(Reg)); + LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI) << "=g" + << State->GetGroup(Reg)); // It wasn't previously live but now it is, this is a kill. Forget // the previous live-range information and start a new live-range @@ -477,7 +484,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI, HandleLastUse(Reg, Count, "(last-use)"); if (Special) { - DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)"); + LLVM_DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)"); State->UnionGroups(Reg, 0); } @@ -489,12 +496,12 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI, RegRefs.insert(std::make_pair(Reg, RR)); } - DEBUG(dbgs() << '\n'); + LLVM_DEBUG(dbgs() << '\n'); // Form a group of all defs and uses of a KILL instruction to ensure // that all registers are renamed as a group. if (MI.isKill()) { - DEBUG(dbgs() << "\tKill Group:"); + LLVM_DEBUG(dbgs() << "\tKill Group:"); unsigned FirstReg = 0; for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { @@ -504,15 +511,15 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI, if (Reg == 0) continue; if (FirstReg != 0) { - DEBUG(dbgs() << "=" << printReg(Reg, TRI)); + LLVM_DEBUG(dbgs() << "=" << printReg(Reg, TRI)); State->UnionGroups(FirstReg, Reg); } else { - DEBUG(dbgs() << " " << printReg(Reg, TRI)); + LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI)); FirstReg = Reg; } } - DEBUG(dbgs() << "->g" << State->GetGroup(FirstReg) << '\n'); + LLVM_DEBUG(dbgs() << "->g" << State->GetGroup(FirstReg) << '\n'); } } @@ -535,7 +542,7 @@ BitVector AggressiveAntiDepBreaker::GetRenameRegisters(unsigned Reg) { BV &= RCBV; } - DEBUG(dbgs() << " " << TRI->getRegClassName(RC)); + LLVM_DEBUG(dbgs() << " " << TRI->getRegClassName(RC)); } return BV; @@ -562,8 +569,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( // Find the "superest" register in the group. At the same time, // collect the BitVector of registers that can be used to rename // each register. - DEBUG(dbgs() << "\tRename Candidates for Group g" << AntiDepGroupIndex - << ":\n"); + LLVM_DEBUG(dbgs() << "\tRename Candidates for Group g" << AntiDepGroupIndex + << ":\n"); std::map<unsigned, BitVector> RenameRegisterMap; unsigned SuperReg = 0; for (unsigned i = 0, e = Regs.size(); i != e; ++i) { @@ -573,13 +580,13 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( // If Reg has any references, then collect possible rename regs if (RegRefs.count(Reg) > 0) { - DEBUG(dbgs() << "\t\t" << printReg(Reg, TRI) << ":"); + LLVM_DEBUG(dbgs() << "\t\t" << printReg(Reg, TRI) << ":"); BitVector &BV = RenameRegisterMap[Reg]; assert(BV.empty()); BV = GetRenameRegisters(Reg); - DEBUG({ + LLVM_DEBUG({ dbgs() << " ::"; for (unsigned r : BV.set_bits()) dbgs() << " " << printReg(r, TRI); @@ -625,11 +632,11 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(SuperRC); if (Order.empty()) { - DEBUG(dbgs() << "\tEmpty Super Regclass!!\n"); + LLVM_DEBUG(dbgs() << "\tEmpty Super Regclass!!\n"); return false; } - DEBUG(dbgs() << "\tFind Registers:"); + LLVM_DEBUG(dbgs() << "\tFind Registers:"); RenameOrder.insert(RenameOrderType::value_type(SuperRC, Order.size())); @@ -645,7 +652,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( // Don't replace a register with itself. if (NewSuperReg == SuperReg) continue; - DEBUG(dbgs() << " [" << printReg(NewSuperReg, TRI) << ':'); + LLVM_DEBUG(dbgs() << " [" << printReg(NewSuperReg, TRI) << ':'); RenameMap.clear(); // For each referenced group register (which must be a SuperReg or @@ -662,11 +669,11 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( NewReg = TRI->getSubReg(NewSuperReg, NewSubRegIdx); } - DEBUG(dbgs() << " " << printReg(NewReg, TRI)); + LLVM_DEBUG(dbgs() << " " << printReg(NewReg, TRI)); // Check if Reg can be renamed to NewReg. if (!RenameRegisterMap[Reg].test(NewReg)) { - DEBUG(dbgs() << "(no rename)"); + LLVM_DEBUG(dbgs() << "(no rename)"); goto next_super_reg; } @@ -675,7 +682,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( // must also check all aliases of NewReg, because we can't define a // register when any sub or super is already live. if (State->IsLive(NewReg) || (KillIndices[Reg] > DefIndices[NewReg])) { - DEBUG(dbgs() << "(live)"); + LLVM_DEBUG(dbgs() << "(live)"); goto next_super_reg; } else { bool found = false; @@ -683,7 +690,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( unsigned AliasReg = *AI; if (State->IsLive(AliasReg) || (KillIndices[Reg] > DefIndices[AliasReg])) { - DEBUG(dbgs() << "(alias " << printReg(AliasReg, TRI) << " live)"); + LLVM_DEBUG(dbgs() + << "(alias " << printReg(AliasReg, TRI) << " live)"); found = true; break; } @@ -701,7 +709,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( continue; if (UseMI->getOperand(Idx).isEarlyClobber()) { - DEBUG(dbgs() << "(ec)"); + LLVM_DEBUG(dbgs() << "(ec)"); goto next_super_reg; } } @@ -715,7 +723,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( MachineInstr *DefMI = Q.second.Operand->getParent(); if (DefMI->readsRegister(NewReg, TRI)) { - DEBUG(dbgs() << "(ec)"); + LLVM_DEBUG(dbgs() << "(ec)"); goto next_super_reg; } } @@ -728,14 +736,14 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( // renamed, as recorded in RenameMap. RenameOrder.erase(SuperRC); RenameOrder.insert(RenameOrderType::value_type(SuperRC, R)); - DEBUG(dbgs() << "]\n"); + LLVM_DEBUG(dbgs() << "]\n"); return true; next_super_reg: - DEBUG(dbgs() << ']'); + LLVM_DEBUG(dbgs() << ']'); } while (R != EndR); - DEBUG(dbgs() << '\n'); + LLVM_DEBUG(dbgs() << '\n'); // No registers are free and available! return false; @@ -788,13 +796,13 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( } #ifndef NDEBUG - DEBUG(dbgs() << "\n===== Aggressive anti-dependency breaking\n"); - DEBUG(dbgs() << "Available regs:"); + LLVM_DEBUG(dbgs() << "\n===== Aggressive anti-dependency breaking\n"); + LLVM_DEBUG(dbgs() << "Available regs:"); for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) { if (!State->IsLive(Reg)) - DEBUG(dbgs() << " " << printReg(Reg, TRI)); + LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI)); } - DEBUG(dbgs() << '\n'); + LLVM_DEBUG(dbgs() << '\n'); #endif BitVector RegAliases(TRI->getNumRegs()); @@ -808,11 +816,11 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( I != E; --Count) { MachineInstr &MI = *--I; - if (MI.isDebugValue()) + if (MI.isDebugInstr()) continue; - DEBUG(dbgs() << "Anti: "); - DEBUG(MI.dump()); + LLVM_DEBUG(dbgs() << "Anti: "); + LLVM_DEBUG(MI.dump()); std::set<unsigned> PassthruRegs; GetPassthruRegs(MI, PassthruRegs); @@ -848,30 +856,30 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( (Edge->getKind() != SDep::Output)) continue; unsigned AntiDepReg = Edge->getReg(); - DEBUG(dbgs() << "\tAntidep reg: " << printReg(AntiDepReg, TRI)); + LLVM_DEBUG(dbgs() << "\tAntidep reg: " << printReg(AntiDepReg, TRI)); assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); if (!MRI.isAllocatable(AntiDepReg)) { // Don't break anti-dependencies on non-allocatable registers. - DEBUG(dbgs() << " (non-allocatable)\n"); + LLVM_DEBUG(dbgs() << " (non-allocatable)\n"); continue; } else if (ExcludeRegs && ExcludeRegs->test(AntiDepReg)) { // Don't break anti-dependencies for critical path registers // if not on the critical path - DEBUG(dbgs() << " (not critical-path)\n"); + LLVM_DEBUG(dbgs() << " (not critical-path)\n"); continue; } else if (PassthruRegs.count(AntiDepReg) != 0) { // If the anti-dep register liveness "passes-thru", then // don't try to change it. It will be changed along with // the use if required to break an earlier antidep. - DEBUG(dbgs() << " (passthru)\n"); + LLVM_DEBUG(dbgs() << " (passthru)\n"); continue; } else { // No anti-dep breaking for implicit deps MachineOperand *AntiDepOp = MI.findRegisterDefOperand(AntiDepReg); assert(AntiDepOp && "Can't find index for defined register operand"); if (!AntiDepOp || AntiDepOp->isImplicit()) { - DEBUG(dbgs() << " (implicit)\n"); + LLVM_DEBUG(dbgs() << " (implicit)\n"); continue; } @@ -897,13 +905,13 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( PE = PathSU->Preds.end(); P != PE; ++P) { if ((P->getSUnit() == NextSU) && (P->getKind() != SDep::Anti) && (P->getKind() != SDep::Output)) { - DEBUG(dbgs() << " (real dependency)\n"); + LLVM_DEBUG(dbgs() << " (real dependency)\n"); AntiDepReg = 0; break; } else if ((P->getSUnit() != NextSU) && (P->getKind() == SDep::Data) && (P->getReg() == AntiDepReg)) { - DEBUG(dbgs() << " (other dependency)\n"); + LLVM_DEBUG(dbgs() << " (other dependency)\n"); AntiDepReg = 0; break; } @@ -941,17 +949,17 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( // Determine AntiDepReg's register group. const unsigned GroupIndex = State->GetGroup(AntiDepReg); if (GroupIndex == 0) { - DEBUG(dbgs() << " (zero group)\n"); + LLVM_DEBUG(dbgs() << " (zero group)\n"); continue; } - DEBUG(dbgs() << '\n'); + LLVM_DEBUG(dbgs() << '\n'); // Look for a suitable register to use to break the anti-dependence. std::map<unsigned, unsigned> RenameMap; if (FindSuitableFreeRegisters(GroupIndex, RenameOrder, RenameMap)) { - DEBUG(dbgs() << "\tBreaking anti-dependence edge on " - << printReg(AntiDepReg, TRI) << ":"); + LLVM_DEBUG(dbgs() << "\tBreaking anti-dependence edge on " + << printReg(AntiDepReg, TRI) << ":"); // Handle each group register... for (std::map<unsigned, unsigned>::iterator @@ -959,9 +967,9 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( unsigned CurrReg = S->first; unsigned NewReg = S->second; - DEBUG(dbgs() << " " << printReg(CurrReg, TRI) << "->" - << printReg(NewReg, TRI) << "(" - << RegRefs.count(CurrReg) << " refs)"); + LLVM_DEBUG(dbgs() << " " << printReg(CurrReg, TRI) << "->" + << printReg(NewReg, TRI) << "(" + << RegRefs.count(CurrReg) << " refs)"); // Update the references to the old register CurrReg to // refer to the new register NewReg. @@ -994,7 +1002,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( } ++Broken; - DEBUG(dbgs() << '\n'); + LLVM_DEBUG(dbgs() << '\n'); } } } diff --git a/contrib/llvm/lib/CodeGen/AllocationOrder.cpp b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp index 8e8c1d8e08d1..37dcb0be824e 100644 --- a/contrib/llvm/lib/CodeGen/AllocationOrder.cpp +++ b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp @@ -39,7 +39,7 @@ AllocationOrder::AllocationOrder(unsigned VirtReg, HardHints = true; rewind(); - DEBUG({ + LLVM_DEBUG({ if (!Hints.empty()) { dbgs() << "hints:"; for (unsigned I = 0, E = Hints.size(); I != E; ++I) diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp index 0731ae575437..79f11def38f7 100644 --- a/contrib/llvm/lib/CodeGen/Analysis.cpp +++ b/contrib/llvm/lib/CodeGen/Analysis.cpp @@ -629,26 +629,26 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F, return true; } -static void collectFuncletMembers( - DenseMap<const MachineBasicBlock *, int> &FuncletMembership, int Funclet, +static void collectEHScopeMembers( + DenseMap<const MachineBasicBlock *, int> &EHScopeMembership, int EHScope, const MachineBasicBlock *MBB) { SmallVector<const MachineBasicBlock *, 16> Worklist = {MBB}; while (!Worklist.empty()) { const MachineBasicBlock *Visiting = Worklist.pop_back_val(); - // Don't follow blocks which start new funclets. + // Don't follow blocks which start new scopes. if (Visiting->isEHPad() && Visiting != MBB) continue; - // Add this MBB to our funclet. - auto P = FuncletMembership.insert(std::make_pair(Visiting, Funclet)); + // Add this MBB to our scope. + auto P = EHScopeMembership.insert(std::make_pair(Visiting, EHScope)); // Don't revisit blocks. if (!P.second) { - assert(P.first->second == Funclet && "MBB is part of two funclets!"); + assert(P.first->second == EHScope && "MBB is part of two scopes!"); continue; } - // Returns are boundaries where funclet transfer can occur, don't follow + // Returns are boundaries where scope transfer can occur, don't follow // successors. if (Visiting->isReturnBlock()) continue; @@ -659,25 +659,25 @@ static void collectFuncletMembers( } DenseMap<const MachineBasicBlock *, int> -llvm::getFuncletMembership(const MachineFunction &MF) { - DenseMap<const MachineBasicBlock *, int> FuncletMembership; +llvm::getEHScopeMembership(const MachineFunction &MF) { + DenseMap<const MachineBasicBlock *, int> EHScopeMembership; // We don't have anything to do if there aren't any EH pads. - if (!MF.hasEHFunclets()) - return FuncletMembership; + if (!MF.hasEHScopes()) + return EHScopeMembership; int EntryBBNumber = MF.front().getNumber(); bool IsSEH = isAsynchronousEHPersonality( classifyEHPersonality(MF.getFunction().getPersonalityFn())); const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); - SmallVector<const MachineBasicBlock *, 16> FuncletBlocks; + SmallVector<const MachineBasicBlock *, 16> EHScopeBlocks; SmallVector<const MachineBasicBlock *, 16> UnreachableBlocks; SmallVector<const MachineBasicBlock *, 16> SEHCatchPads; SmallVector<std::pair<const MachineBasicBlock *, int>, 16> CatchRetSuccessors; for (const MachineBasicBlock &MBB : MF) { - if (MBB.isEHFuncletEntry()) { - FuncletBlocks.push_back(&MBB); + if (MBB.isEHScopeEntry()) { + EHScopeBlocks.push_back(&MBB); } else if (IsSEH && MBB.isEHPad()) { SEHCatchPads.push_back(&MBB); } else if (MBB.pred_empty()) { @@ -686,8 +686,8 @@ llvm::getFuncletMembership(const MachineFunction &MF) { MachineBasicBlock::const_iterator MBBI = MBB.getFirstTerminator(); - // CatchPads are not funclets for SEH so do not consider CatchRet to - // transfer control to another funclet. + // CatchPads are not scopes for SEH so do not consider CatchRet to + // transfer control to another scope. if (MBBI == MBB.end() || MBBI->getOpcode() != TII->getCatchReturnOpcode()) continue; @@ -700,24 +700,24 @@ llvm::getFuncletMembership(const MachineFunction &MF) { } // We don't have anything to do if there aren't any EH pads. - if (FuncletBlocks.empty()) - return FuncletMembership; + if (EHScopeBlocks.empty()) + return EHScopeMembership; // Identify all the basic blocks reachable from the function entry. - collectFuncletMembers(FuncletMembership, EntryBBNumber, &MF.front()); - // All blocks not part of a funclet are in the parent function. + collectEHScopeMembers(EHScopeMembership, EntryBBNumber, &MF.front()); + // All blocks not part of a scope are in the parent function. for (const MachineBasicBlock *MBB : UnreachableBlocks) - collectFuncletMembers(FuncletMembership, EntryBBNumber, MBB); - // Next, identify all the blocks inside the funclets. - for (const MachineBasicBlock *MBB : FuncletBlocks) - collectFuncletMembers(FuncletMembership, MBB->getNumber(), MBB); - // SEH CatchPads aren't really funclets, handle them separately. + collectEHScopeMembers(EHScopeMembership, EntryBBNumber, MBB); + // Next, identify all the blocks inside the scopes. + for (const MachineBasicBlock *MBB : EHScopeBlocks) + collectEHScopeMembers(EHScopeMembership, MBB->getNumber(), MBB); + // SEH CatchPads aren't really scopes, handle them separately. for (const MachineBasicBlock *MBB : SEHCatchPads) - collectFuncletMembers(FuncletMembership, EntryBBNumber, MBB); + collectEHScopeMembers(EHScopeMembership, EntryBBNumber, MBB); // Finally, identify all the targets of a catchret. for (std::pair<const MachineBasicBlock *, int> CatchRetPair : CatchRetSuccessors) - collectFuncletMembers(FuncletMembership, CatchRetPair.second, + collectEHScopeMembers(EHScopeMembership, CatchRetPair.second, CatchRetPair.first); - return FuncletMembership; + return EHScopeMembership; } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp index 15cfbd5c40ff..9011f025f595 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -91,7 +91,8 @@ void ARMException::endFunction(const MachineFunction *MF) { ATS.emitFnEnd(); } -void ARMException::emitTypeInfos(unsigned TTypeEncoding) { +void ARMException::emitTypeInfos(unsigned TTypeEncoding, + MCSymbol *TTBaseLabel) { const MachineFunction *MF = Asm->MF; const std::vector<const GlobalValue *> &TypeInfos = MF->getTypeInfos(); const std::vector<unsigned> &FilterIds = MF->getFilterIds(); @@ -112,6 +113,8 @@ void ARMException::emitTypeInfos(unsigned TTypeEncoding) { Asm->EmitTTypeReference(GV, TTypeEncoding); } + Asm->OutStreamer->EmitLabel(TTBaseLabel); + // Emit the Exception Specifications. if (VerboseAsm && !FilterIds.empty()) { Asm->OutStreamer->AddComment(">> Filter TypeInfos <<"); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp new file mode 100644 index 000000000000..20b0b8d3feab --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp @@ -0,0 +1,721 @@ +//===- llvm/CodeGen/AsmPrinter/AccelTable.cpp - Accelerator Tables --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing accelerator tables. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/AccelTable.h" +#include "DwarfCompileUnit.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DIE.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cstddef> +#include <cstdint> +#include <limits> +#include <vector> + +using namespace llvm; + +void AccelTableBase::computeBucketCount() { + // First get the number of unique hashes. + std::vector<uint32_t> Uniques; + Uniques.reserve(Entries.size()); + for (const auto &E : Entries) + Uniques.push_back(E.second.HashValue); + array_pod_sort(Uniques.begin(), Uniques.end()); + std::vector<uint32_t>::iterator P = + std::unique(Uniques.begin(), Uniques.end()); + + UniqueHashCount = std::distance(Uniques.begin(), P); + + if (UniqueHashCount > 1024) + BucketCount = UniqueHashCount / 4; + else if (UniqueHashCount > 16) + BucketCount = UniqueHashCount / 2; + else + BucketCount = std::max<uint32_t>(UniqueHashCount, 1); +} + +void AccelTableBase::finalize(AsmPrinter *Asm, StringRef Prefix) { + // Create the individual hash data outputs. + for (auto &E : Entries) { + // Unique the entries. + std::stable_sort(E.second.Values.begin(), E.second.Values.end(), + [](const AccelTableData *A, const AccelTableData *B) { + return *A < *B; + }); + E.second.Values.erase( + std::unique(E.second.Values.begin(), E.second.Values.end()), + E.second.Values.end()); + } + + // Figure out how many buckets we need, then compute the bucket contents and + // the final ordering. The hashes and offsets can be emitted by walking these + // data structures. We add temporary symbols to the data so they can be + // referenced when emitting the offsets. + computeBucketCount(); + + // Compute bucket contents and final ordering. + Buckets.resize(BucketCount); + for (auto &E : Entries) { + uint32_t Bucket = E.second.HashValue % BucketCount; + Buckets[Bucket].push_back(&E.second); + E.second.Sym = Asm->createTempSymbol(Prefix); + } + + // Sort the contents of the buckets by hash value so that hash collisions end + // up together. Stable sort makes testing easier and doesn't cost much more. + for (auto &Bucket : Buckets) + std::stable_sort(Bucket.begin(), Bucket.end(), + [](HashData *LHS, HashData *RHS) { + return LHS->HashValue < RHS->HashValue; + }); +} + +namespace { +/// Base class for writing out Accelerator tables. It holds the common +/// functionality for the two Accelerator table types. +class AccelTableWriter { +protected: + AsmPrinter *const Asm; ///< Destination. + const AccelTableBase &Contents; ///< Data to emit. + + /// Controls whether to emit duplicate hash and offset table entries for names + /// with identical hashes. Apple tables don't emit duplicate entries, DWARF v5 + /// tables do. + const bool SkipIdenticalHashes; + + void emitHashes() const; + + /// Emit offsets to lists of entries with identical names. The offsets are + /// relative to the Base argument. + void emitOffsets(const MCSymbol *Base) const; + +public: + AccelTableWriter(AsmPrinter *Asm, const AccelTableBase &Contents, + bool SkipIdenticalHashes) + : Asm(Asm), Contents(Contents), SkipIdenticalHashes(SkipIdenticalHashes) { + } +}; + +class AppleAccelTableWriter : public AccelTableWriter { + using Atom = AppleAccelTableData::Atom; + + /// The fixed header of an Apple Accelerator Table. + struct Header { + uint32_t Magic = MagicHash; + uint16_t Version = 1; + uint16_t HashFunction = dwarf::DW_hash_function_djb; + uint32_t BucketCount; + uint32_t HashCount; + uint32_t HeaderDataLength; + + /// 'HASH' magic value to detect endianness. + static const uint32_t MagicHash = 0x48415348; + + Header(uint32_t BucketCount, uint32_t UniqueHashCount, uint32_t DataLength) + : BucketCount(BucketCount), HashCount(UniqueHashCount), + HeaderDataLength(DataLength) {} + + void emit(AsmPrinter *Asm) const; +#ifndef NDEBUG + void print(raw_ostream &OS) const; + void dump() const { print(dbgs()); } +#endif + }; + + /// The HeaderData describes the structure of an Apple accelerator table + /// through a list of Atoms. + struct HeaderData { + /// In the case of data that is referenced via DW_FORM_ref_* the offset + /// base is used to describe the offset for all forms in the list of atoms. + uint32_t DieOffsetBase; + + const SmallVector<Atom, 4> Atoms; + + HeaderData(ArrayRef<Atom> AtomList, uint32_t Offset = 0) + : DieOffsetBase(Offset), Atoms(AtomList.begin(), AtomList.end()) {} + + void emit(AsmPrinter *Asm) const; +#ifndef NDEBUG + void print(raw_ostream &OS) const; + void dump() const { print(dbgs()); } +#endif + }; + + Header Header; + HeaderData HeaderData; + const MCSymbol *SecBegin; + + void emitBuckets() const; + void emitData() const; + +public: + AppleAccelTableWriter(AsmPrinter *Asm, const AccelTableBase &Contents, + ArrayRef<Atom> Atoms, const MCSymbol *SecBegin) + : AccelTableWriter(Asm, Contents, true), + Header(Contents.getBucketCount(), Contents.getUniqueHashCount(), + 8 + (Atoms.size() * 4)), + HeaderData(Atoms), SecBegin(SecBegin) {} + + void emit() const; + +#ifndef NDEBUG + void print(raw_ostream &OS) const; + void dump() const { print(dbgs()); } +#endif +}; + +/// Class responsible for emitting a DWARF v5 Accelerator Table. The only +/// public function is emit(), which performs the actual emission. +/// +/// The class is templated in its data type. This allows us to emit both dyamic +/// and static data entries. A callback abstract the logic to provide a CU +/// index for a given entry, which is different per data type, but identical +/// for every entry in the same table. +template <typename DataT> +class Dwarf5AccelTableWriter : public AccelTableWriter { + struct Header { + uint32_t UnitLength = 0; + uint16_t Version = 5; + uint16_t Padding = 0; + uint32_t CompUnitCount; + uint32_t LocalTypeUnitCount = 0; + uint32_t ForeignTypeUnitCount = 0; + uint32_t BucketCount; + uint32_t NameCount; + uint32_t AbbrevTableSize = 0; + uint32_t AugmentationStringSize = sizeof(AugmentationString); + char AugmentationString[8] = {'L', 'L', 'V', 'M', '0', '7', '0', '0'}; + + Header(uint32_t CompUnitCount, uint32_t BucketCount, uint32_t NameCount) + : CompUnitCount(CompUnitCount), BucketCount(BucketCount), + NameCount(NameCount) {} + + void emit(const Dwarf5AccelTableWriter &Ctx) const; + }; + struct AttributeEncoding { + dwarf::Index Index; + dwarf::Form Form; + }; + + Header Header; + DenseMap<uint32_t, SmallVector<AttributeEncoding, 2>> Abbreviations; + ArrayRef<MCSymbol *> CompUnits; + llvm::function_ref<unsigned(const DataT &)> getCUIndexForEntry; + MCSymbol *ContributionStart = Asm->createTempSymbol("names_start"); + MCSymbol *ContributionEnd = Asm->createTempSymbol("names_end"); + MCSymbol *AbbrevStart = Asm->createTempSymbol("names_abbrev_start"); + MCSymbol *AbbrevEnd = Asm->createTempSymbol("names_abbrev_end"); + MCSymbol *EntryPool = Asm->createTempSymbol("names_entries"); + + DenseSet<uint32_t> getUniqueTags() const; + + // Right now, we emit uniform attributes for all tags. + SmallVector<AttributeEncoding, 2> getUniformAttributes() const; + + void emitCUList() const; + void emitBuckets() const; + void emitStringOffsets() const; + void emitAbbrevs() const; + void emitEntry(const DataT &Entry) const; + void emitData() const; + +public: + Dwarf5AccelTableWriter( + AsmPrinter *Asm, const AccelTableBase &Contents, + ArrayRef<MCSymbol *> CompUnits, + llvm::function_ref<unsigned(const DataT &)> GetCUIndexForEntry); + + void emit() const; +}; +} // namespace + +void AccelTableWriter::emitHashes() const { + uint64_t PrevHash = std::numeric_limits<uint64_t>::max(); + unsigned BucketIdx = 0; + for (auto &Bucket : Contents.getBuckets()) { + for (auto &Hash : Bucket) { + uint32_t HashValue = Hash->HashValue; + if (SkipIdenticalHashes && PrevHash == HashValue) + continue; + Asm->OutStreamer->AddComment("Hash in Bucket " + Twine(BucketIdx)); + Asm->emitInt32(HashValue); + PrevHash = HashValue; + } + BucketIdx++; + } +} + +void AccelTableWriter::emitOffsets(const MCSymbol *Base) const { + const auto &Buckets = Contents.getBuckets(); + uint64_t PrevHash = std::numeric_limits<uint64_t>::max(); + for (size_t i = 0, e = Buckets.size(); i < e; ++i) { + for (auto *Hash : Buckets[i]) { + uint32_t HashValue = Hash->HashValue; + if (SkipIdenticalHashes && PrevHash == HashValue) + continue; + PrevHash = HashValue; + Asm->OutStreamer->AddComment("Offset in Bucket " + Twine(i)); + Asm->EmitLabelDifference(Hash->Sym, Base, sizeof(uint32_t)); + } + } +} + +void AppleAccelTableWriter::Header::emit(AsmPrinter *Asm) const { + Asm->OutStreamer->AddComment("Header Magic"); + Asm->emitInt32(Magic); + Asm->OutStreamer->AddComment("Header Version"); + Asm->emitInt16(Version); + Asm->OutStreamer->AddComment("Header Hash Function"); + Asm->emitInt16(HashFunction); + Asm->OutStreamer->AddComment("Header Bucket Count"); + Asm->emitInt32(BucketCount); + Asm->OutStreamer->AddComment("Header Hash Count"); + Asm->emitInt32(HashCount); + Asm->OutStreamer->AddComment("Header Data Length"); + Asm->emitInt32(HeaderDataLength); +} + +void AppleAccelTableWriter::HeaderData::emit(AsmPrinter *Asm) const { + Asm->OutStreamer->AddComment("HeaderData Die Offset Base"); + Asm->emitInt32(DieOffsetBase); + Asm->OutStreamer->AddComment("HeaderData Atom Count"); + Asm->emitInt32(Atoms.size()); + + for (const Atom &A : Atoms) { + Asm->OutStreamer->AddComment(dwarf::AtomTypeString(A.Type)); + Asm->emitInt16(A.Type); + Asm->OutStreamer->AddComment(dwarf::FormEncodingString(A.Form)); + Asm->emitInt16(A.Form); + } +} + +void AppleAccelTableWriter::emitBuckets() const { + const auto &Buckets = Contents.getBuckets(); + unsigned index = 0; + for (size_t i = 0, e = Buckets.size(); i < e; ++i) { + Asm->OutStreamer->AddComment("Bucket " + Twine(i)); + if (!Buckets[i].empty()) + Asm->emitInt32(index); + else + Asm->emitInt32(std::numeric_limits<uint32_t>::max()); + // Buckets point in the list of hashes, not to the data. Do not increment + // the index multiple times in case of hash collisions. + uint64_t PrevHash = std::numeric_limits<uint64_t>::max(); + for (auto *HD : Buckets[i]) { + uint32_t HashValue = HD->HashValue; + if (PrevHash != HashValue) + ++index; + PrevHash = HashValue; + } + } +} + +void AppleAccelTableWriter::emitData() const { + const auto &Buckets = Contents.getBuckets(); + for (size_t i = 0, e = Buckets.size(); i < e; ++i) { + uint64_t PrevHash = std::numeric_limits<uint64_t>::max(); + for (auto &Hash : Buckets[i]) { + // Terminate the previous entry if there is no hash collision with the + // current one. + if (PrevHash != std::numeric_limits<uint64_t>::max() && + PrevHash != Hash->HashValue) + Asm->emitInt32(0); + // Remember to emit the label for our offset. + Asm->OutStreamer->EmitLabel(Hash->Sym); + Asm->OutStreamer->AddComment(Hash->Name.getString()); + Asm->emitDwarfStringOffset(Hash->Name); + Asm->OutStreamer->AddComment("Num DIEs"); + Asm->emitInt32(Hash->Values.size()); + for (const auto *V : Hash->Values) + static_cast<const AppleAccelTableData *>(V)->emit(Asm); + PrevHash = Hash->HashValue; + } + // Emit the final end marker for the bucket. + if (!Buckets[i].empty()) + Asm->emitInt32(0); + } +} + +void AppleAccelTableWriter::emit() const { + Header.emit(Asm); + HeaderData.emit(Asm); + emitBuckets(); + emitHashes(); + emitOffsets(SecBegin); + emitData(); +} + +template <typename DataT> +void Dwarf5AccelTableWriter<DataT>::Header::emit( + const Dwarf5AccelTableWriter &Ctx) const { + assert(CompUnitCount > 0 && "Index must have at least one CU."); + + AsmPrinter *Asm = Ctx.Asm; + Asm->OutStreamer->AddComment("Header: unit length"); + Asm->EmitLabelDifference(Ctx.ContributionEnd, Ctx.ContributionStart, + sizeof(uint32_t)); + Asm->OutStreamer->EmitLabel(Ctx.ContributionStart); + Asm->OutStreamer->AddComment("Header: version"); + Asm->emitInt16(Version); + Asm->OutStreamer->AddComment("Header: padding"); + Asm->emitInt16(Padding); + Asm->OutStreamer->AddComment("Header: compilation unit count"); + Asm->emitInt32(CompUnitCount); + Asm->OutStreamer->AddComment("Header: local type unit count"); + Asm->emitInt32(LocalTypeUnitCount); + Asm->OutStreamer->AddComment("Header: foreign type unit count"); + Asm->emitInt32(ForeignTypeUnitCount); + Asm->OutStreamer->AddComment("Header: bucket count"); + Asm->emitInt32(BucketCount); + Asm->OutStreamer->AddComment("Header: name count"); + Asm->emitInt32(NameCount); + Asm->OutStreamer->AddComment("Header: abbreviation table size"); + Asm->EmitLabelDifference(Ctx.AbbrevEnd, Ctx.AbbrevStart, sizeof(uint32_t)); + Asm->OutStreamer->AddComment("Header: augmentation string size"); + assert(AugmentationStringSize % 4 == 0); + Asm->emitInt32(AugmentationStringSize); + Asm->OutStreamer->AddComment("Header: augmentation string"); + Asm->OutStreamer->EmitBytes({AugmentationString, AugmentationStringSize}); +} + +template <typename DataT> +DenseSet<uint32_t> Dwarf5AccelTableWriter<DataT>::getUniqueTags() const { + DenseSet<uint32_t> UniqueTags; + for (auto &Bucket : Contents.getBuckets()) { + for (auto *Hash : Bucket) { + for (auto *Value : Hash->Values) { + unsigned Tag = static_cast<const DataT *>(Value)->getDieTag(); + UniqueTags.insert(Tag); + } + } + } + return UniqueTags; +} + +template <typename DataT> +SmallVector<typename Dwarf5AccelTableWriter<DataT>::AttributeEncoding, 2> +Dwarf5AccelTableWriter<DataT>::getUniformAttributes() const { + SmallVector<AttributeEncoding, 2> UA; + if (CompUnits.size() > 1) { + size_t LargestCUIndex = CompUnits.size() - 1; + dwarf::Form Form = DIEInteger::BestForm(/*IsSigned*/ false, LargestCUIndex); + UA.push_back({dwarf::DW_IDX_compile_unit, Form}); + } + UA.push_back({dwarf::DW_IDX_die_offset, dwarf::DW_FORM_ref4}); + return UA; +} + +template <typename DataT> +void Dwarf5AccelTableWriter<DataT>::emitCUList() const { + for (const auto &CU : enumerate(CompUnits)) { + Asm->OutStreamer->AddComment("Compilation unit " + Twine(CU.index())); + Asm->emitDwarfSymbolReference(CU.value()); + } +} + +template <typename DataT> +void Dwarf5AccelTableWriter<DataT>::emitBuckets() const { + uint32_t Index = 1; + for (const auto &Bucket : enumerate(Contents.getBuckets())) { + Asm->OutStreamer->AddComment("Bucket " + Twine(Bucket.index())); + Asm->emitInt32(Bucket.value().empty() ? 0 : Index); + Index += Bucket.value().size(); + } +} + +template <typename DataT> +void Dwarf5AccelTableWriter<DataT>::emitStringOffsets() const { + for (const auto &Bucket : enumerate(Contents.getBuckets())) { + for (auto *Hash : Bucket.value()) { + DwarfStringPoolEntryRef String = Hash->Name; + Asm->OutStreamer->AddComment("String in Bucket " + Twine(Bucket.index()) + + ": " + String.getString()); + Asm->emitDwarfStringOffset(String); + } + } +} + +template <typename DataT> +void Dwarf5AccelTableWriter<DataT>::emitAbbrevs() const { + Asm->OutStreamer->EmitLabel(AbbrevStart); + for (const auto &Abbrev : Abbreviations) { + Asm->OutStreamer->AddComment("Abbrev code"); + assert(Abbrev.first != 0); + Asm->EmitULEB128(Abbrev.first); + Asm->OutStreamer->AddComment(dwarf::TagString(Abbrev.first)); + Asm->EmitULEB128(Abbrev.first); + for (const auto &AttrEnc : Abbrev.second) { + Asm->EmitULEB128(AttrEnc.Index, dwarf::IndexString(AttrEnc.Index).data()); + Asm->EmitULEB128(AttrEnc.Form, + dwarf::FormEncodingString(AttrEnc.Form).data()); + } + Asm->EmitULEB128(0, "End of abbrev"); + Asm->EmitULEB128(0, "End of abbrev"); + } + Asm->EmitULEB128(0, "End of abbrev list"); + Asm->OutStreamer->EmitLabel(AbbrevEnd); +} + +template <typename DataT> +void Dwarf5AccelTableWriter<DataT>::emitEntry(const DataT &Entry) const { + auto AbbrevIt = Abbreviations.find(Entry.getDieTag()); + assert(AbbrevIt != Abbreviations.end() && + "Why wasn't this abbrev generated?"); + + Asm->EmitULEB128(AbbrevIt->first, "Abbreviation code"); + for (const auto &AttrEnc : AbbrevIt->second) { + Asm->OutStreamer->AddComment(dwarf::IndexString(AttrEnc.Index)); + switch (AttrEnc.Index) { + case dwarf::DW_IDX_compile_unit: { + DIEInteger ID(getCUIndexForEntry(Entry)); + ID.EmitValue(Asm, AttrEnc.Form); + break; + } + case dwarf::DW_IDX_die_offset: + assert(AttrEnc.Form == dwarf::DW_FORM_ref4); + Asm->emitInt32(Entry.getDieOffset()); + break; + default: + llvm_unreachable("Unexpected index attribute!"); + } + } +} + +template <typename DataT> void Dwarf5AccelTableWriter<DataT>::emitData() const { + Asm->OutStreamer->EmitLabel(EntryPool); + for (auto &Bucket : Contents.getBuckets()) { + for (auto *Hash : Bucket) { + // Remember to emit the label for our offset. + Asm->OutStreamer->EmitLabel(Hash->Sym); + for (const auto *Value : Hash->Values) + emitEntry(*static_cast<const DataT *>(Value)); + Asm->OutStreamer->AddComment("End of list: " + Hash->Name.getString()); + Asm->emitInt32(0); + } + } +} + +template <typename DataT> +Dwarf5AccelTableWriter<DataT>::Dwarf5AccelTableWriter( + AsmPrinter *Asm, const AccelTableBase &Contents, + ArrayRef<MCSymbol *> CompUnits, + llvm::function_ref<unsigned(const DataT &)> getCUIndexForEntry) + : AccelTableWriter(Asm, Contents, false), + Header(CompUnits.size(), Contents.getBucketCount(), + Contents.getUniqueNameCount()), + CompUnits(CompUnits), getCUIndexForEntry(std::move(getCUIndexForEntry)) { + DenseSet<uint32_t> UniqueTags = getUniqueTags(); + SmallVector<AttributeEncoding, 2> UniformAttributes = getUniformAttributes(); + + Abbreviations.reserve(UniqueTags.size()); + for (uint32_t Tag : UniqueTags) + Abbreviations.try_emplace(Tag, UniformAttributes); +} + +template <typename DataT> void Dwarf5AccelTableWriter<DataT>::emit() const { + Header.emit(*this); + emitCUList(); + emitBuckets(); + emitHashes(); + emitStringOffsets(); + emitOffsets(EntryPool); + emitAbbrevs(); + emitData(); + Asm->OutStreamer->EmitValueToAlignment(4, 0); + Asm->OutStreamer->EmitLabel(ContributionEnd); +} + +void llvm::emitAppleAccelTableImpl(AsmPrinter *Asm, AccelTableBase &Contents, + StringRef Prefix, const MCSymbol *SecBegin, + ArrayRef<AppleAccelTableData::Atom> Atoms) { + Contents.finalize(Asm, Prefix); + AppleAccelTableWriter(Asm, Contents, Atoms, SecBegin).emit(); +} + +void llvm::emitDWARF5AccelTable( + AsmPrinter *Asm, AccelTable<DWARF5AccelTableData> &Contents, + const DwarfDebug &DD, ArrayRef<std::unique_ptr<DwarfCompileUnit>> CUs) { + std::vector<MCSymbol *> CompUnits; + for (const auto &CU : enumerate(CUs)) { + assert(CU.index() == CU.value()->getUniqueID()); + const DwarfCompileUnit *MainCU = + DD.useSplitDwarf() ? CU.value()->getSkeleton() : CU.value().get(); + CompUnits.push_back(MainCU->getLabelBegin()); + } + + Contents.finalize(Asm, "names"); + Dwarf5AccelTableWriter<DWARF5AccelTableData>( + Asm, Contents, CompUnits, + [&DD](const DWARF5AccelTableData &Entry) { + const DIE *CUDie = Entry.getDie().getUnitDie(); + return DD.lookupCU(CUDie)->getUniqueID(); + }) + .emit(); +} + +void llvm::emitDWARF5AccelTable( + AsmPrinter *Asm, AccelTable<DWARF5AccelTableStaticData> &Contents, + ArrayRef<MCSymbol *> CUs, + llvm::function_ref<unsigned(const DWARF5AccelTableStaticData &)> + getCUIndexForEntry) { + Contents.finalize(Asm, "names"); + Dwarf5AccelTableWriter<DWARF5AccelTableStaticData>(Asm, Contents, CUs, + getCUIndexForEntry) + .emit(); +} + +void AppleAccelTableOffsetData::emit(AsmPrinter *Asm) const { + Asm->emitInt32(Die.getDebugSectionOffset()); +} + +void AppleAccelTableTypeData::emit(AsmPrinter *Asm) const { + Asm->emitInt32(Die.getDebugSectionOffset()); + Asm->emitInt16(Die.getTag()); + Asm->emitInt8(0); +} + +void AppleAccelTableStaticOffsetData::emit(AsmPrinter *Asm) const { + Asm->emitInt32(Offset); +} + +void AppleAccelTableStaticTypeData::emit(AsmPrinter *Asm) const { + Asm->emitInt32(Offset); + Asm->emitInt16(Tag); + Asm->emitInt8(ObjCClassIsImplementation ? dwarf::DW_FLAG_type_implementation + : 0); + Asm->emitInt32(QualifiedNameHash); +} + +#ifndef _MSC_VER +// The lines below are rejected by older versions (TBD) of MSVC. +constexpr AppleAccelTableData::Atom AppleAccelTableTypeData::Atoms[]; +constexpr AppleAccelTableData::Atom AppleAccelTableOffsetData::Atoms[]; +constexpr AppleAccelTableData::Atom AppleAccelTableStaticOffsetData::Atoms[]; +constexpr AppleAccelTableData::Atom AppleAccelTableStaticTypeData::Atoms[]; +#else +// FIXME: Erase this path once the minimum MSCV version has been bumped. +const SmallVector<AppleAccelTableData::Atom, 4> + AppleAccelTableOffsetData::Atoms = { + Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)}; +const SmallVector<AppleAccelTableData::Atom, 4> AppleAccelTableTypeData::Atoms = + {Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4), + Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2), + Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)}; +const SmallVector<AppleAccelTableData::Atom, 4> + AppleAccelTableStaticOffsetData::Atoms = { + Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)}; +const SmallVector<AppleAccelTableData::Atom, 4> + AppleAccelTableStaticTypeData::Atoms = { + Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4), + Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2), + Atom(5, dwarf::DW_FORM_data1), Atom(6, dwarf::DW_FORM_data4)}; +#endif + +#ifndef NDEBUG +void AppleAccelTableWriter::Header::print(raw_ostream &OS) const { + OS << "Magic: " << format("0x%x", Magic) << "\n" + << "Version: " << Version << "\n" + << "Hash Function: " << HashFunction << "\n" + << "Bucket Count: " << BucketCount << "\n" + << "Header Data Length: " << HeaderDataLength << "\n"; +} + +void AppleAccelTableData::Atom::print(raw_ostream &OS) const { + OS << "Type: " << dwarf::AtomTypeString(Type) << "\n" + << "Form: " << dwarf::FormEncodingString(Form) << "\n"; +} + +void AppleAccelTableWriter::HeaderData::print(raw_ostream &OS) const { + OS << "DIE Offset Base: " << DieOffsetBase << "\n"; + for (auto Atom : Atoms) + Atom.print(OS); +} + +void AppleAccelTableWriter::print(raw_ostream &OS) const { + Header.print(OS); + HeaderData.print(OS); + Contents.print(OS); + SecBegin->print(OS, nullptr); +} + +void AccelTableBase::HashData::print(raw_ostream &OS) const { + OS << "Name: " << Name.getString() << "\n"; + OS << " Hash Value: " << format("0x%x", HashValue) << "\n"; + OS << " Symbol: "; + if (Sym) + OS << *Sym; + else + OS << "<none>"; + OS << "\n"; + for (auto *Value : Values) + Value->print(OS); +} + +void AccelTableBase::print(raw_ostream &OS) const { + // Print Content. + OS << "Entries: \n"; + for (const auto &Entry : Entries) { + OS << "Name: " << Entry.first() << "\n"; + for (auto *V : Entry.second.Values) + V->print(OS); + } + + OS << "Buckets and Hashes: \n"; + for (auto &Bucket : Buckets) + for (auto &Hash : Bucket) + Hash->print(OS); + + OS << "Data: \n"; + for (auto &E : Entries) + E.second.print(OS); +} + +void DWARF5AccelTableData::print(raw_ostream &OS) const { + OS << " Offset: " << getDieOffset() << "\n"; + OS << " Tag: " << dwarf::TagString(getDieTag()) << "\n"; +} + +void DWARF5AccelTableStaticData::print(raw_ostream &OS) const { + OS << " Offset: " << getDieOffset() << "\n"; + OS << " Tag: " << dwarf::TagString(getDieTag()) << "\n"; +} + +void AppleAccelTableOffsetData::print(raw_ostream &OS) const { + OS << " Offset: " << Die.getOffset() << "\n"; +} + +void AppleAccelTableTypeData::print(raw_ostream &OS) const { + OS << " Offset: " << Die.getOffset() << "\n"; + OS << " Tag: " << dwarf::TagString(Die.getTag()) << "\n"; +} + +void AppleAccelTableStaticOffsetData::print(raw_ostream &OS) const { + OS << " Static Offset: " << Offset << "\n"; +} + +void AppleAccelTableStaticTypeData::print(raw_ostream &OS) const { + OS << " Static Offset: " << Offset << "\n"; + OS << " QualifiedNameHash: " << format("%x\n", QualifiedNameHash) << "\n"; + OS << " Tag: " << dwarf::TagString(Tag) << "\n"; + OS << " ObjCClassIsImplementation: " + << (ObjCClassIsImplementation ? "true" : "false"); + OS << "\n"; +} +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp index 59ed0324bdb0..4a226527cb5b 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp @@ -10,9 +10,9 @@ #include "AddressPool.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/TargetLoweringObjectFile.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include <utility> using namespace llvm; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h index 990a158d87cd..5350006bf744 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h @@ -39,7 +39,7 @@ class AddressPool { public: AddressPool() = default; - /// \brief Returns the index into the address pool with the given + /// Returns the index into the address pool with the given /// label/symbol. unsigned getIndex(const MCSymbol *Sym, bool TLS = false); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index d7995447592c..9bbc77b3056b 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -16,6 +16,7 @@ #include "CodeViewDebug.h" #include "DwarfDebug.h" #include "DwarfException.h" +#include "WinCFGuard.h" #include "WinException.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" @@ -30,7 +31,6 @@ #include "llvm/ADT/Twine.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/EHPersonalities.h" -#include "llvm/Analysis/ObjectUtils.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/BinaryFormat/ELF.h" @@ -39,6 +39,7 @@ #include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -54,7 +55,6 @@ #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" -#include "llvm/CodeGen/TargetLoweringObjectFile.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -87,6 +87,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSection.h" +#include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" @@ -107,6 +108,7 @@ #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include <algorithm> @@ -130,6 +132,8 @@ static const char *const DbgTimerName = "emit"; static const char *const DbgTimerDescription = "Debug Info Emission"; static const char *const EHTimerName = "write_exception"; static const char *const EHTimerDescription = "DWARF Exception Writer"; +static const char *const CFGuardName = "Control Flow Guard"; +static const char *const CFGuardDescription = "Control Flow Guard Tables"; static const char *const CodeViewLineTablesGroupName = "linetables"; static const char *const CodeViewLineTablesGroupDescription = "CodeView Line Tables"; @@ -211,8 +215,10 @@ const DataLayout &AsmPrinter::getDataLayout() const { } // Do not use the cached DataLayout because some client use it without a Module -// (llvm-dsymutil, llvm-dwarfdump). -unsigned AsmPrinter::getPointerSize() const { return TM.getPointerSize(); } +// (dsymutil, llvm-dwarfdump). +unsigned AsmPrinter::getPointerSize() const { + return TM.getPointerSize(0); // FIXME: Default address space +} const MCSubtargetInfo &AsmPrinter::getSubtargetInfo() const { assert(MF && "getSubtargetInfo requires a valid MachineFunction!"); @@ -234,7 +240,6 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<MachineModuleInfo>(); AU.addRequired<MachineOptimizationRemarkEmitterPass>(); AU.addRequired<GCModuleInfo>(); - AU.addRequired<MachineLoopInfo>(); } bool AsmPrinter::doInitialization(Module &M) { @@ -246,7 +251,7 @@ bool AsmPrinter::doInitialization(Module &M) { OutStreamer->InitSections(false); - // Emit the version-min deplyment target directive if needed. + // Emit the version-min deployment target directive if needed. // // FIXME: If we end up with a collection of these sorts of Darwin-specific // or ELF-specific things, it may make sense to have a platform helper class @@ -291,8 +296,7 @@ bool AsmPrinter::doInitialization(Module &M) { if (MAI->doesSupportDebugInformation()) { bool EmitCodeView = MMI->getModule()->getCodeViewFlag(); - if (EmitCodeView && (TM.getTargetTriple().isKnownWindowsMSVCEnvironment() || - TM.getTargetTriple().isWindowsItaniumEnvironment())) { + if (EmitCodeView && TM.getTargetTriple().isOSWindows()) { Handlers.push_back(HandlerInfo(new CodeViewDebug(this), DbgTimerName, DbgTimerDescription, CodeViewLineTablesGroupName, @@ -350,10 +354,20 @@ bool AsmPrinter::doInitialization(Module &M) { break; } break; + case ExceptionHandling::Wasm: + // TODO to prevent warning + break; } if (ES) Handlers.push_back(HandlerInfo(ES, EHTimerName, EHTimerDescription, DWARFGroupName, DWARFGroupDescription)); + + if (mdconst::extract_or_null<ConstantInt>( + MMI->getModule()->getModuleFlag("cfguard"))) + Handlers.push_back(HandlerInfo(new WinCFGuard(this), CFGuardName, + CFGuardDescription, DWARFGroupName, + DWARFGroupDescription)); + return false; } @@ -361,7 +375,7 @@ static bool canBeHidden(const GlobalValue *GV, const MCAsmInfo &MAI) { if (!MAI.hasWeakDefCanBeHiddenDirective()) return false; - return canBeOmittedFromSymbolTable(GV); + return GV->canBeOmittedFromSymbolTable(); } void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const { @@ -416,7 +430,7 @@ MCSymbol *AsmPrinter::getSymbol(const GlobalValue *GV) const { /// EmitGlobalVariable - Emit the specified global variable to the .s file. void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { - bool IsEmuTLSVar = TM.Options.EmulatedTLS && GV->isThreadLocal(); + bool IsEmuTLSVar = TM.useEmulatedTLS() && GV->isThreadLocal(); assert(!(IsEmuTLSVar && GV->hasCommonLinkage()) && "No emulated TLS variables in the common section"); @@ -898,6 +912,30 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { return true; } +/// This method handles the target-independent form of DBG_LABEL, returning +/// true if it was able to do so. A false return means the target will need +/// to handle MI in EmitInstruction. +static bool emitDebugLabelComment(const MachineInstr *MI, AsmPrinter &AP) { + if (MI->getNumOperands() != 1) + return false; + + SmallString<128> Str; + raw_svector_ostream OS(Str); + OS << "DEBUG_LABEL: "; + + const DILabel *V = MI->getDebugLabel(); + if (auto *SP = dyn_cast<DISubprogram>(V->getScope())) { + StringRef Name = SP->getName(); + if (!Name.empty()) + OS << Name << ":"; + } + OS << V->getName(); + + // NOTE: Want this comment at start of line, don't emit with AddComment. + AP.OutStreamer->emitRawComment(OS.str()); + return true; +} + AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() const { if (MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI && MF->getFunction().needsUnwindTableEntry()) @@ -952,7 +990,8 @@ void AsmPrinter::emitStackSizeSection(const MachineFunction &MF) { if (!MF.getTarget().Options.EmitStackSizeSection) return; - MCSection *StackSizeSection = getObjFileLowering().getStackSizesSection(); + MCSection *StackSizeSection = + getObjFileLowering().getStackSizesSection(*getCurrentSection()); if (!StackSizeSection) return; @@ -964,10 +1003,9 @@ void AsmPrinter::emitStackSizeSection(const MachineFunction &MF) { OutStreamer->PushSection(); OutStreamer->SwitchSection(StackSizeSection); - const MCSymbol *FunctionSymbol = getSymbol(&MF.getFunction()); + const MCSymbol *FunctionSymbol = getFunctionBegin(); uint64_t StackSize = FrameInfo.getStackSize(); - OutStreamer->EmitValue(MCSymbolRefExpr::create(FunctionSymbol, OutContext), - /* size = */ 8); + OutStreamer->EmitSymbolValue(FunctionSymbol, TM.getProgramPointerSize()); OutStreamer->EmitULEB128IntValue(StackSize); OutStreamer->PopSection(); @@ -996,6 +1034,24 @@ void AsmPrinter::EmitFunctionBody() { bool ShouldPrintDebugScopes = MMI->hasDebugInfo(); + if (isVerbose()) { + // Get MachineDominatorTree or compute it on the fly if it's unavailable + MDT = getAnalysisIfAvailable<MachineDominatorTree>(); + if (!MDT) { + OwnedMDT = make_unique<MachineDominatorTree>(); + OwnedMDT->getBase().recalculate(*MF); + MDT = OwnedMDT.get(); + } + + // Get MachineLoopInfo or compute it on the fly if it's unavailable + MLI = getAnalysisIfAvailable<MachineLoopInfo>(); + if (!MLI) { + OwnedMLI = make_unique<MachineLoopInfo>(); + OwnedMLI->getBase().analyze(MDT->getBase()); + MLI = OwnedMLI.get(); + } + } + // Print out code for the function. bool HasAnyRealCode = false; int NumInstsInFunction = 0; @@ -1005,7 +1061,7 @@ void AsmPrinter::EmitFunctionBody() { for (auto &MI : MBB) { // Print the assembly for the instruction. if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() && - !MI.isDebugValue()) { + !MI.isDebugInstr()) { HasAnyRealCode = true; ++NumInstsInFunction; } @@ -1044,6 +1100,12 @@ void AsmPrinter::EmitFunctionBody() { EmitInstruction(&MI); } break; + case TargetOpcode::DBG_LABEL: + if (isVerbose()) { + if (!emitDebugLabelComment(&MI, *this)) + EmitInstruction(&MI); + } + break; case TargetOpcode::IMPLICIT_DEF: if (isVerbose()) emitImplicitDef(&MI); break; @@ -1155,7 +1217,7 @@ void AsmPrinter::EmitFunctionBody() { OutStreamer->AddBlankLine(); } -/// \brief Compute the number of Global Variables that uses a Constant. +/// Compute the number of Global Variables that uses a Constant. static unsigned getNumGlobalVariableUses(const Constant *C) { if (!C) return 0; @@ -1170,7 +1232,7 @@ static unsigned getNumGlobalVariableUses(const Constant *C) { return NumUses; } -/// \brief Only consider global GOT equivalents if at least one user is a +/// Only consider global GOT equivalents if at least one user is a /// cstexpr inside an initializer of another global variables. Also, don't /// handle cstexpr inside instructions. During global variable emission, /// candidates are skipped and are emitted later in case at least one cstexpr @@ -1193,7 +1255,7 @@ static bool isGOTEquivalentCandidate(const GlobalVariable *GV, return NumGOTEquivUsers > 0; } -/// \brief Unnamed constant global variables solely contaning a pointer to +/// Unnamed constant global variables solely contaning a pointer to /// another globals variable is equivalent to a GOT table entry; it contains the /// the address of another symbol. Optimize it and replace accesses to these /// "GOT equivalents" by using the GOT entry for the final global instead. @@ -1214,7 +1276,7 @@ void AsmPrinter::computeGlobalGOTEquivs(Module &M) { } } -/// \brief Constant expressions using GOT equivalent globals may not be eligible +/// Constant expressions using GOT equivalent globals may not be eligible /// for PC relative GOT entry conversion, in such cases we need to emit such /// globals we previously omitted in EmitGlobalVariable. void AsmPrinter::emitGlobalGOTEquivs() { @@ -1312,7 +1374,7 @@ bool AsmPrinter::doFinalization(Module &M) { const TargetLoweringObjectFile &TLOF = getObjFileLowering(); - TLOF.emitModuleMetadata(*OutStreamer, M, TM); + TLOF.emitModuleMetadata(*OutStreamer, M); if (TM.getTargetTriple().isOSBinFormatELF()) { MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>(); @@ -1323,6 +1385,7 @@ bool AsmPrinter::doFinalization(Module &M) { OutStreamer->SwitchSection(TLOF.getDataSection()); const DataLayout &DL = M.getDataLayout(); + EmitAlignment(Log2_32(DL.getPointerSize())); for (const auto &Stub : Stubs) { OutStreamer->EmitLabel(Stub.first); OutStreamer->EmitSymbolValue(Stub.second.getPointer(), @@ -1421,6 +1484,61 @@ bool AsmPrinter::doFinalization(Module &M) { if (MCSection *S = MAI->getNonexecutableStackSection(OutContext)) OutStreamer->SwitchSection(S); + if (TM.getTargetTriple().isOSBinFormatCOFF()) { + // Emit /EXPORT: flags for each exported global as necessary. + const auto &TLOF = getObjFileLowering(); + std::string Flags; + + for (const GlobalValue &GV : M.global_values()) { + raw_string_ostream OS(Flags); + TLOF.emitLinkerFlagsForGlobal(OS, &GV); + OS.flush(); + if (!Flags.empty()) { + OutStreamer->SwitchSection(TLOF.getDrectveSection()); + OutStreamer->EmitBytes(Flags); + } + Flags.clear(); + } + + // Emit /INCLUDE: flags for each used global as necessary. + if (const auto *LU = M.getNamedGlobal("llvm.used")) { + assert(LU->hasInitializer() && + "expected llvm.used to have an initializer"); + assert(isa<ArrayType>(LU->getValueType()) && + "expected llvm.used to be an array type"); + if (const auto *A = cast<ConstantArray>(LU->getInitializer())) { + for (const Value *Op : A->operands()) { + const auto *GV = + cast<GlobalValue>(Op->stripPointerCastsNoFollowAliases()); + // Global symbols with internal or private linkage are not visible to + // the linker, and thus would cause an error when the linker tried to + // preserve the symbol due to the `/include:` directive. + if (GV->hasLocalLinkage()) + continue; + + raw_string_ostream OS(Flags); + TLOF.emitLinkerFlagsForUsed(OS, GV); + OS.flush(); + + if (!Flags.empty()) { + OutStreamer->SwitchSection(TLOF.getDrectveSection()); + OutStreamer->EmitBytes(Flags); + } + Flags.clear(); + } + } + } + } + + if (TM.Options.EmitAddrsig) { + // Emit address-significance attributes for all globals. + OutStreamer->EmitAddrsig(); + for (const GlobalValue &GV : M.global_values()) + if (!GV.isThreadLocal() && !GV.getName().startswith("llvm.") && + !GV.hasAtLeastLocalUnnamedAddr()) + OutStreamer->EmitAddrsigSym(getSymbol(&GV)); + } + // Allow the target to emit any magic that it wants at the end of the file, // after everything else has gone out. EmitEndOfAsmFile(M); @@ -1429,6 +1547,8 @@ bool AsmPrinter::doFinalization(Module &M) { OutStreamer->Finish(); OutStreamer->reset(); + OwnedMLI.reset(); + OwnedMDT.reset(); return false; } @@ -1447,14 +1567,14 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { CurrentFnBegin = nullptr; CurExceptionSym = nullptr; bool NeedsLocalForSize = MAI->needsLocalForSize(); - if (needFuncLabelsForEHOrDebugInfo(MF, MMI) || NeedsLocalForSize) { + if (needFuncLabelsForEHOrDebugInfo(MF, MMI) || NeedsLocalForSize || + MF.getTarget().Options.EmitStackSizeSection) { CurrentFnBegin = createTempSymbol("func_begin"); if (NeedsLocalForSize) CurrentFnSymForSize = CurrentFnBegin; } ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE(); - LI = &getAnalysis<MachineLoopInfo>(); const TargetSubtargetInfo &STI = MF.getSubtarget(); EnablePrintSchedInfo = PrintSchedule.getNumOccurrences() @@ -1842,22 +1962,27 @@ void AsmPrinter::EmitModuleIdents(Module &M) { // Emission and print routines // -/// EmitInt8 - Emit a byte directive and value. +/// Emit a byte directive and value. /// -void AsmPrinter::EmitInt8(int Value) const { +void AsmPrinter::emitInt8(int Value) const { OutStreamer->EmitIntValue(Value, 1); } -/// EmitInt16 - Emit a short directive and value. -void AsmPrinter::EmitInt16(int Value) const { +/// Emit a short directive and value. +void AsmPrinter::emitInt16(int Value) const { OutStreamer->EmitIntValue(Value, 2); } -/// EmitInt32 - Emit a long directive and value. -void AsmPrinter::EmitInt32(int Value) const { +/// Emit a long directive and value. +void AsmPrinter::emitInt32(int Value) const { OutStreamer->EmitIntValue(Value, 4); } +/// Emit a long long directive and value. +void AsmPrinter::emitInt64(uint64_t Value) const { + OutStreamer->EmitIntValue(Value, 8); +} + /// Emit something like ".long Hi-Lo" where the size in bytes of the directive /// is specified by Size and Hi/Lo specify the labels. This implicitly uses /// .set if it avoids relocations. @@ -2069,6 +2194,7 @@ static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *C, uint64_t Offset = 0); static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP); +static void emitGlobalConstantFP(APFloat APF, Type *ET, AsmPrinter &AP); /// isRepeatedByteSequence - Determine whether the given value is /// composed of a repeated sequence of identical bytes and return the @@ -2146,13 +2272,15 @@ static void emitGlobalConstantDataSequential(const DataLayout &DL, ElementByteSize); } } else { + Type *ET = CDS->getElementType(); for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) - emitGlobalConstantFP(cast<ConstantFP>(CDS->getElementAsConstant(I)), AP); + emitGlobalConstantFP(CDS->getElementAsAPFloat(I), ET, AP); } unsigned Size = DL.getTypeAllocSize(CDS->getType()); unsigned EmittedSize = DL.getTypeAllocSize(CDS->getType()->getElementType()) * CDS->getNumElements(); + assert(EmittedSize <= Size && "Size cannot be less than EmittedSize!"); if (unsigned Padding = Size - EmittedSize) AP.OutStreamer->EmitZeros(Padding); } @@ -2216,17 +2344,17 @@ static void emitGlobalConstantStruct(const DataLayout &DL, "Layout of constant struct may be incorrect!"); } -static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) { - APInt API = CFP->getValueAPF().bitcastToAPInt(); +static void emitGlobalConstantFP(APFloat APF, Type *ET, AsmPrinter &AP) { + APInt API = APF.bitcastToAPInt(); // First print a comment with what we think the original floating-point value // should have been. if (AP.isVerbose()) { SmallString<8> StrVal; - CFP->getValueAPF().toString(StrVal); + APF.toString(StrVal); - if (CFP->getType()) - CFP->getType()->print(AP.OutStreamer->GetCommentOS()); + if (ET) + ET->print(AP.OutStreamer->GetCommentOS()); else AP.OutStreamer->GetCommentOS() << "Printing <null> Type"; AP.OutStreamer->GetCommentOS() << ' ' << StrVal << '\n'; @@ -2241,7 +2369,7 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) { // PPC's long double has odd notions of endianness compared to how LLVM // handles it: p[0] goes first for *big* endian on PPC. - if (AP.getDataLayout().isBigEndian() && !CFP->getType()->isPPC_FP128Ty()) { + if (AP.getDataLayout().isBigEndian() && !ET->isPPC_FP128Ty()) { int Chunk = API.getNumWords() - 1; if (TrailingBytes) @@ -2260,8 +2388,11 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) { // Emit the tail padding for the long double. const DataLayout &DL = AP.getDataLayout(); - AP.OutStreamer->EmitZeros(DL.getTypeAllocSize(CFP->getType()) - - DL.getTypeStoreSize(CFP->getType())); + AP.OutStreamer->EmitZeros(DL.getTypeAllocSize(ET) - DL.getTypeStoreSize(ET)); +} + +static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) { + emitGlobalConstantFP(CFP->getValueAPF(), CFP->getType(), AP); } static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) { @@ -2320,7 +2451,7 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) { } } -/// \brief Transform a not absolute MCExpr containing a reference to a GOT +/// Transform a not absolute MCExpr containing a reference to a GOT /// equivalent global, by a target specific GOT pc relative access to the /// final symbol. static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME, @@ -2533,6 +2664,25 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const { /// GetCPISymbol - Return the symbol for the specified constant pool entry. MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const { + if (getSubtargetInfo().getTargetTriple().isKnownWindowsMSVCEnvironment()) { + const MachineConstantPoolEntry &CPE = + MF->getConstantPool()->getConstants()[CPID]; + if (!CPE.isMachineConstantPoolEntry()) { + const DataLayout &DL = MF->getDataLayout(); + SectionKind Kind = CPE.getSectionKind(&DL); + const Constant *C = CPE.Val.ConstVal; + unsigned Align = CPE.Alignment; + if (const MCSectionCOFF *S = dyn_cast<MCSectionCOFF>( + getObjFileLowering().getSectionForConstant(DL, Kind, C, Align))) { + if (MCSymbol *Sym = S->getCOMDATSymbol()) { + if (Sym->isUndefined()) + OutStreamer->EmitSymbolAttribute(Sym, MCSA_Global); + return Sym; + } + } + } + } + const DataLayout &DL = getDataLayout(); return OutContext.getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) + "CPI" + Twine(getFunctionNumber()) + "_" + @@ -2631,13 +2781,9 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB, void AsmPrinter::setupCodePaddingContext(const MachineBasicBlock &MBB, MCCodePaddingContext &Context) const { assert(MF != nullptr && "Machine function must be valid"); - assert(LI != nullptr && "Loop info must be valid"); Context.IsPaddingActive = !MF->hasInlineAsm() && !MF->getFunction().optForSize() && TM.getOptLevel() != CodeGenOpt::None; - const MachineLoop *CurrentLoop = LI->getLoopFor(&MBB); - Context.IsBasicBlockInsideInnermostLoop = - CurrentLoop != nullptr && CurrentLoop->getSubLoops().empty(); Context.IsBasicBlockReachableViaFallthrough = std::find(MBB.pred_begin(), MBB.pred_end(), MBB.getPrevNode()) != MBB.pred_end(); @@ -2689,7 +2835,9 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const { OutStreamer->GetCommentOS() << '\n'; } } - emitBasicBlockLoopComments(MBB, LI, *this); + + assert(MLI != nullptr && "MachineLoopInfo should has been computed"); + emitBasicBlockLoopComments(MBB, MLI, *this); } // Print the main label for the block. diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 08eb14e242c5..605588470670 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -17,7 +17,6 @@ #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/DIE.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/TargetLoweringObjectFile.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCRegisterInfo.h" @@ -26,6 +25,7 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MachineLocation.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -43,15 +43,6 @@ void AsmPrinter::EmitSLEB128(int64_t Value, const char *Desc) const { OutStreamer->EmitSLEB128IntValue(Value); } -/// EmitULEB128 - emit the specified unsigned leb128 value. -void AsmPrinter::EmitPaddedULEB128(uint64_t Value, unsigned PadTo, - const char *Desc) const { - if (isVerbose() && Desc) - OutStreamer->AddComment(Desc); - - OutStreamer->EmitPaddedULEB128IntValue(Value, PadTo); -} - void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc) const { if (isVerbose() && Desc) OutStreamer->AddComment(Desc); @@ -59,6 +50,12 @@ void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc) const { OutStreamer->EmitULEB128IntValue(Value); } +/// Emit something like ".uleb128 Hi-Lo". +void AsmPrinter::EmitLabelDifferenceAsULEB128(const MCSymbol *Hi, + const MCSymbol *Lo) const { + OutStreamer->emitAbsoluteSymbolDiffAsULEB128(Hi, Lo); +} + static const char *DecodeDWARFEncoding(unsigned Encoding) { switch (Encoding) { case dwarf::DW_EH_PE_absptr: @@ -67,6 +64,10 @@ static const char *DecodeDWARFEncoding(unsigned Encoding) { return "omit"; case dwarf::DW_EH_PE_pcrel: return "pcrel"; + case dwarf::DW_EH_PE_uleb128: + return "uleb128"; + case dwarf::DW_EH_PE_sleb128: + return "sleb128"; case dwarf::DW_EH_PE_udata4: return "udata4"; case dwarf::DW_EH_PE_udata8: @@ -167,14 +168,19 @@ void AsmPrinter::emitDwarfSymbolReference(const MCSymbol *Label, EmitLabelDifference(Label, Label->getSection().getBeginSymbol(), 4); } -void AsmPrinter::emitDwarfStringOffset(DwarfStringPoolEntryRef S) const { +void AsmPrinter::emitDwarfStringOffset(DwarfStringPoolEntry S) const { if (MAI->doesDwarfUseRelocationsAcrossSections()) { - emitDwarfSymbolReference(S.getSymbol()); + assert(S.Symbol && "No symbol available"); + emitDwarfSymbolReference(S.Symbol); return; } // Just emit the offset directly; no need for symbol math. - EmitInt32(S.getOffset()); + emitInt32(S.Offset); +} + +void AsmPrinter::EmitDwarfOffset(const MCSymbol *Label, uint64_t Offset) const { + EmitLabelPlusOffset(Label, Offset, MAI->getCodePointerSize()); } //===----------------------------------------------------------------------===// @@ -252,7 +258,7 @@ void AsmPrinter::emitDwarfDIE(const DIE &Die) const { emitDwarfDIE(Child); OutStreamer->AddComment("End Of Children Mark"); - EmitInt8(0); + emitInt8(0); } } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h index 638226e90a7a..f5ac95a20b10 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h @@ -27,29 +27,29 @@ class MCSymbol; typedef MCSymbol *ExceptionSymbolProvider(AsmPrinter *Asm); -/// \brief Collects and handles AsmPrinter objects required to build debug +/// Collects and handles AsmPrinter objects required to build debug /// or EH information. class AsmPrinterHandler { public: virtual ~AsmPrinterHandler(); - /// \brief For symbols that have a size designated (e.g. common symbols), + /// For symbols that have a size designated (e.g. common symbols), /// this tracks that size. virtual void setSymbolSize(const MCSymbol *Sym, uint64_t Size) = 0; - /// \brief Emit all sections that should come after the content. + /// Emit all sections that should come after the content. virtual void endModule() = 0; - /// \brief Gather pre-function debug information. + /// Gather pre-function debug information. /// Every beginFunction(MF) call should be followed by an endFunction(MF) /// call. virtual void beginFunction(const MachineFunction *MF) = 0; - // \brief Emit any of function marker (like .cfi_endproc). This is called + // Emit any of function marker (like .cfi_endproc). This is called // before endFunction and cannot switch sections. virtual void markFunctionEnd(); - /// \brief Gather post-function debug information. + /// Gather post-function debug information. /// Please note that some AsmPrinter implementations may not call /// beginFunction at all. virtual void endFunction(const MachineFunction *MF) = 0; @@ -58,15 +58,15 @@ public: ExceptionSymbolProvider ESP) {} virtual void endFragment() {} - /// \brief Emit target-specific EH funclet machinery. + /// Emit target-specific EH funclet machinery. virtual void beginFunclet(const MachineBasicBlock &MBB, MCSymbol *Sym = nullptr) {} virtual void endFunclet() {} - /// \brief Process beginning of an instruction. + /// Process beginning of an instruction. virtual void beginInstruction(const MachineInstr *MI) = 0; - /// \brief Process end of an instruction. + /// Process end of an instruction. virtual void endInstruction() = 0; }; } // End of namespace llvm diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 04a72ba3d738..4159eb19423a 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -132,6 +132,9 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI, std::unique_ptr<MCAsmParser> Parser( createMCAsmParser(SrcMgr, OutContext, *OutStreamer, *MAI, BufNum)); + // Do not use assembler-level information for parsing inline assembly. + OutStreamer->setUseAssemblerInfoForParsing(false); + // We create a new MCInstrInfo here since we might be at the module level // and not have a MachineFunction to initialize the TargetInstrInfo from and // we only need MCInstrInfo for asm parsing. We create one unconditionally diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h b/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h index aaf6180c9404..2163cc7e3e11 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h @@ -43,7 +43,7 @@ public: APByteStreamer(AsmPrinter &Asm) : AP(Asm) {} void EmitInt8(uint8_t Byte, const Twine &Comment) override { AP.OutStreamer->AddComment(Comment); - AP.EmitInt8(Byte); + AP.emitInt8(Byte); } void EmitSLEB128(uint64_t DWord, const Twine &Comment) override { AP.OutStreamer->AddComment(Comment); @@ -76,7 +76,7 @@ private: SmallVectorImpl<char> &Buffer; SmallVectorImpl<std::string> &Comments; - /// \brief Only verbose textual output needs comments. This will be set to + /// Only verbose textual output needs comments. This will be set to /// true for that case, and false otherwise. If false, comments passed in to /// the emit methods will be ignored. bool GenerateComments; @@ -93,15 +93,27 @@ public: } void EmitSLEB128(uint64_t DWord, const Twine &Comment) override { raw_svector_ostream OSE(Buffer); - encodeSLEB128(DWord, OSE); - if (GenerateComments) + unsigned Length = encodeSLEB128(DWord, OSE); + if (GenerateComments) { Comments.push_back(Comment.str()); + // Add some empty comments to keep the Buffer and Comments vectors aligned + // with each other. + for (size_t i = 1; i < Length; ++i) + Comments.push_back(""); + + } } void EmitULEB128(uint64_t DWord, const Twine &Comment) override { raw_svector_ostream OSE(Buffer); - encodeULEB128(DWord, OSE); - if (GenerateComments) + unsigned Length = encodeULEB128(DWord, OSE); + if (GenerateComments) { Comments.push_back(Comment.str()); + // Add some empty comments to keep the Buffer and Comments vectors aligned + // with each other. + for (size_t i = 1; i < Length; ++i) + Comments.push_back(""); + + } } }; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index 1d0a003dc50a..8c5c5478d01a 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -36,7 +36,6 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/TargetFrameLowering.h" -#include "llvm/CodeGen/TargetLoweringObjectFile.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Config/llvm-config.h" @@ -75,6 +74,7 @@ #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/ScopedPrinter.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include <algorithm> #include <cassert> @@ -114,6 +114,16 @@ StringRef CodeViewDebug::getFullFilepath(const DIFile *File) { StringRef Dir = File->getDirectory(), Filename = File->getFilename(); + // If this is a Unix-style path, just use it as is. Don't try to canonicalize + // it textually because one of the path components could be a symlink. + if (!Dir.empty() && Dir[0] == '/') { + Filepath = Dir; + if (Dir.back() != '/') + Filepath += '/'; + Filepath += Filename; + return Filepath; + } + // Clang emits directory and relative filename info into the IR, but CodeView // operates on full paths. We could change Clang to emit full paths too, but // that would increase the IR size and probably not needed for other users. @@ -165,14 +175,21 @@ unsigned CodeViewDebug::maybeRecordFile(const DIFile *F) { auto Insertion = FileIdMap.insert(std::make_pair(FullPath, NextId)); if (Insertion.second) { // We have to compute the full filepath and emit a .cv_file directive. - std::string Checksum = fromHex(F->getChecksum()); - void *CKMem = OS.getContext().allocate(Checksum.size(), 1); - memcpy(CKMem, Checksum.data(), Checksum.size()); - ArrayRef<uint8_t> ChecksumAsBytes(reinterpret_cast<const uint8_t *>(CKMem), - Checksum.size()); - DIFile::ChecksumKind ChecksumKind = F->getChecksumKind(); + ArrayRef<uint8_t> ChecksumAsBytes; + FileChecksumKind CSKind = FileChecksumKind::None; + if (F->getChecksum()) { + std::string Checksum = fromHex(F->getChecksum()->Value); + void *CKMem = OS.getContext().allocate(Checksum.size(), 1); + memcpy(CKMem, Checksum.data(), Checksum.size()); + ChecksumAsBytes = ArrayRef<uint8_t>( + reinterpret_cast<const uint8_t *>(CKMem), Checksum.size()); + switch (F->getChecksum()->Kind) { + case DIFile::CSK_MD5: CSKind = FileChecksumKind::MD5; break; + case DIFile::CSK_SHA1: CSKind = FileChecksumKind::SHA1; break; + } + } bool Success = OS.EmitCVFileDirective(NextId, FullPath, ChecksumAsBytes, - static_cast<unsigned>(ChecksumKind)); + static_cast<unsigned>(CSKind)); (void)Success; assert(Success && ".cv_file directive failed"); } @@ -358,15 +375,15 @@ unsigned CodeViewDebug::getPointerSizeInBytes() { } void CodeViewDebug::recordLocalVariable(LocalVariable &&Var, - const DILocation *InlinedAt) { - if (InlinedAt) { + const LexicalScope *LS) { + if (const DILocation *InlinedAt = LS->getInlinedAt()) { // This variable was inlined. Associate it with the InlineSite. const DISubprogram *Inlinee = Var.DIVar->getScope()->getSubprogram(); InlineSite &Site = getInlineSite(InlinedAt, Inlinee); Site.InlinedLocals.emplace_back(Var); } else { - // This variable goes in the main ProcSym. - CurFn->Locals.emplace_back(Var); + // This variable goes into the corresponding lexical scope. + ScopeVariables[LS].emplace_back(Var); } } @@ -463,7 +480,7 @@ void CodeViewDebug::endModule() { // Emit per-function debug information. for (auto &P : FnDebugInfo) if (!P.first->isDeclarationForLinker()) - emitDebugInfoForFunction(P.first, P.second); + emitDebugInfoForFunction(P.first, *P.second); // Emit global variable debug information. setCurrentSubprogram(nullptr); @@ -501,12 +518,12 @@ void CodeViewDebug::endModule() { clear(); } -static void emitNullTerminatedSymbolName(MCStreamer &OS, StringRef S) { +static void emitNullTerminatedSymbolName(MCStreamer &OS, StringRef S, + unsigned MaxFixedRecordLength = 0xF00) { // The maximum CV record length is 0xFF00. Most of the strings we emit appear // after a fixed length portion of the record. The fixed length portion should // always be less than 0xF00 (3840) bytes, so truncate the string so that the // overall record size is less than the maximum allowed. - unsigned MaxFixedRecordLength = 0xF00; SmallString<32> NullTerminatedString( S.take_front(MaxRecordLength - MaxFixedRecordLength - 1)); NullTerminatedString.push_back('\0'); @@ -517,7 +534,7 @@ void CodeViewDebug::emitTypeInformation() { if (TypeTable.empty()) return; - // Start the .debug$T section with 0x4. + // Start the .debug$T or .debug$P section with 0x4. OS.SwitchSection(Asm->getObjFileLowering().getCOFFDebugTypesSection()); emitCodeViewMagicVersion(); @@ -572,7 +589,7 @@ void CodeViewDebug::emitTypeGlobalHashes() { OS.AddComment("Section Version"); OS.EmitIntValue(0, 2); OS.AddComment("Hash Algorithm"); - OS.EmitIntValue(uint16_t(GlobalTypeHashAlg::SHA1), 2); + OS.EmitIntValue(uint16_t(GlobalTypeHashAlg::SHA1_8), 2); TypeIndex TI(TypeIndex::FirstNonSimpleIndex); for (const auto &GHR : TypeTable.hashes()) { @@ -585,7 +602,7 @@ void CodeViewDebug::emitTypeGlobalHashes() { OS.AddComment(Comment); ++TI; } - assert(GHR.Hash.size() % 20 == 0); + assert(GHR.Hash.size() == 8); StringRef S(reinterpret_cast<const char *>(GHR.Hash.data()), GHR.Hash.size()); OS.EmitBinaryData(S); @@ -821,10 +838,61 @@ void CodeViewDebug::switchToDebugSectionForSymbol(const MCSymbol *GVSym) { emitCodeViewMagicVersion(); } +// Emit an S_THUNK32/S_END symbol pair for a thunk routine. +// The only supported thunk ordinal is currently the standard type. +void CodeViewDebug::emitDebugInfoForThunk(const Function *GV, + FunctionInfo &FI, + const MCSymbol *Fn) { + std::string FuncName = GlobalValue::dropLLVMManglingEscape(GV->getName()); + const ThunkOrdinal ordinal = ThunkOrdinal::Standard; // Only supported kind. + + OS.AddComment("Symbol subsection for " + Twine(FuncName)); + MCSymbol *SymbolsEnd = beginCVSubsection(DebugSubsectionKind::Symbols); + + // Emit S_THUNK32 + MCSymbol *ThunkRecordBegin = MMI->getContext().createTempSymbol(), + *ThunkRecordEnd = MMI->getContext().createTempSymbol(); + OS.AddComment("Record length"); + OS.emitAbsoluteSymbolDiff(ThunkRecordEnd, ThunkRecordBegin, 2); + OS.EmitLabel(ThunkRecordBegin); + OS.AddComment("Record kind: S_THUNK32"); + OS.EmitIntValue(unsigned(SymbolKind::S_THUNK32), 2); + OS.AddComment("PtrParent"); + OS.EmitIntValue(0, 4); + OS.AddComment("PtrEnd"); + OS.EmitIntValue(0, 4); + OS.AddComment("PtrNext"); + OS.EmitIntValue(0, 4); + OS.AddComment("Thunk section relative address"); + OS.EmitCOFFSecRel32(Fn, /*Offset=*/0); + OS.AddComment("Thunk section index"); + OS.EmitCOFFSectionIndex(Fn); + OS.AddComment("Code size"); + OS.emitAbsoluteSymbolDiff(FI.End, Fn, 2); + OS.AddComment("Ordinal"); + OS.EmitIntValue(unsigned(ordinal), 1); + OS.AddComment("Function name"); + emitNullTerminatedSymbolName(OS, FuncName); + // Additional fields specific to the thunk ordinal would go here. + OS.EmitLabel(ThunkRecordEnd); + + // Local variables/inlined routines are purposely omitted here. The point of + // marking this as a thunk is so Visual Studio will NOT stop in this routine. + + // Emit S_PROC_ID_END + const unsigned RecordLengthForSymbolEnd = 2; + OS.AddComment("Record length"); + OS.EmitIntValue(RecordLengthForSymbolEnd, 2); + OS.AddComment("Record kind: S_PROC_ID_END"); + OS.EmitIntValue(unsigned(SymbolKind::S_PROC_ID_END), 2); + + endCVSubsection(SymbolsEnd); +} + void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, FunctionInfo &FI) { - // For each function there is a separate subsection - // which holds the PC to file:line table. + // For each function there is a separate subsection which holds the PC to + // file:line table. const MCSymbol *Fn = Asm->getSymbol(GV); assert(Fn); @@ -836,6 +904,11 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, assert(SP); setCurrentSubprogram(SP); + if (SP->isThunk()) { + emitDebugInfoForThunk(GV, FI, Fn); + return; + } + // If we have a display name, build the fully qualified name by walking the // chain of scopes. if (!SP->getName().empty()) @@ -898,6 +971,7 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, OS.EmitLabel(ProcRecordEnd); emitLocalVariableList(FI.Locals); + emitLexicalBlockList(FI.ChildBlocks, FI); // Emit inlined call site information. Only emit functions inlined directly // into the parent function. We'll emit the other sites recursively as part @@ -1018,7 +1092,7 @@ void CodeViewDebug::collectVariableInfoFromMFTable( LocalVariable Var; Var.DIVar = VI.Var; Var.DefRanges.emplace_back(std::move(DefRange)); - recordLocalVariable(std::move(Var), VI.Loc->getInlinedAt()); + recordLocalVariable(std::move(Var), Scope); } } @@ -1100,7 +1174,7 @@ void CodeViewDebug::calculateRanges( auto J = std::next(I); const DIExpression *DIExpr = DVInst->getDebugExpression(); while (J != E && - !fragmentsOverlap(DIExpr, J->first->getDebugExpression())) + !DIExpr->fragmentsOverlap(J->first->getDebugExpression())) ++J; if (J != E) End = getLabelBeforeInsn(J->first); @@ -1149,14 +1223,15 @@ void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) { Var.DIVar = DIVar; calculateRanges(Var, Ranges); - recordLocalVariable(std::move(Var), InlinedAt); + recordLocalVariable(std::move(Var), Scope); } } void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) { const Function &GV = MF->getFunction(); - assert(FnDebugInfo.count(&GV) == false); - CurFn = &FnDebugInfo[&GV]; + auto Insertion = FnDebugInfo.insert({&GV, llvm::make_unique<FunctionInfo>()}); + assert(Insertion.second && "function already has info"); + CurFn = Insertion.first->second.get(); CurFn->FuncId = NextFuncId++; CurFn->Begin = Asm->getFunctionBegin(); @@ -1261,6 +1336,7 @@ TypeIndex CodeViewDebug::lowerType(const DIType *Ty, const DIType *ClassTy) { return lowerTypePointer(cast<DIDerivedType>(Ty)); case dwarf::DW_TAG_ptr_to_member_type: return lowerTypeMemberPointer(cast<DIDerivedType>(Ty)); + case dwarf::DW_TAG_restrict_type: case dwarf::DW_TAG_const_type: case dwarf::DW_TAG_volatile_type: // TODO: add support for DW_TAG_atomic_type here @@ -1281,6 +1357,8 @@ TypeIndex CodeViewDebug::lowerType(const DIType *Ty, const DIType *ClassTy) { return lowerTypeClass(cast<DICompositeType>(Ty)); case dwarf::DW_TAG_union_type: return lowerTypeUnion(cast<DICompositeType>(Ty)); + case dwarf::DW_TAG_unspecified_type: + return TypeIndex::None(); default: // Use the null type index. return TypeIndex(); @@ -1308,7 +1386,7 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) { DITypeRef ElementTypeRef = Ty->getBaseType(); TypeIndex ElementTypeIndex = getTypeIndex(ElementTypeRef); // IndexType is size_t, which depends on the bitness of the target. - TypeIndex IndexType = Asm->TM.getPointerSize() == 8 + TypeIndex IndexType = getPointerSizeInBytes() == 8 ? TypeIndex(SimpleTypeKind::UInt64Quad) : TypeIndex(SimpleTypeKind::UInt32Long); @@ -1323,7 +1401,9 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) { const DISubrange *Subrange = cast<DISubrange>(Element); assert(Subrange->getLowerBound() == 0 && "codeview doesn't support subranges with lower bounds"); - int64_t Count = Subrange->getCount(); + int64_t Count = -1; + if (auto *CI = Subrange->getCount().dyn_cast<ConstantInt*>()) + Count = CI->getSExtValue(); // Forward declarations of arrays without a size and VLAs use a count of -1. // Emit a count of zero in these cases to match what MSVC does for arrays @@ -1441,12 +1521,13 @@ TypeIndex CodeViewDebug::lowerTypeBasic(const DIBasicType *Ty) { return TypeIndex(STK); } -TypeIndex CodeViewDebug::lowerTypePointer(const DIDerivedType *Ty) { +TypeIndex CodeViewDebug::lowerTypePointer(const DIDerivedType *Ty, + PointerOptions PO) { TypeIndex PointeeTI = getTypeIndex(Ty->getBaseType()); - // Pointers to simple types can use SimpleTypeMode, rather than having a - // dedicated pointer type record. - if (PointeeTI.isSimple() && + // Pointers to simple types without any options can use SimpleTypeMode, rather + // than having a dedicated pointer type record. + if (PointeeTI.isSimple() && PO == PointerOptions::None && PointeeTI.getSimpleMode() == SimpleTypeMode::Direct && Ty->getTag() == dwarf::DW_TAG_pointer_type) { SimpleTypeMode Mode = Ty->getSizeInBits() == 64 @@ -1470,10 +1551,7 @@ TypeIndex CodeViewDebug::lowerTypePointer(const DIDerivedType *Ty) { PM = PointerMode::RValueReference; break; } - // FIXME: MSVC folds qualifiers into PointerOptions in the context of a method - // 'this' pointer, but not normal contexts. Figure out what we're supposed to - // do. - PointerOptions PO = PointerOptions::None; + PointerRecord PR(PointeeTI, PK, PM, PO, Ty->getSizeInBits() / 8); return TypeTable.writeLeafType(PR); } @@ -1511,16 +1589,17 @@ translatePtrToMemberRep(unsigned SizeInBytes, bool IsPMF, unsigned Flags) { llvm_unreachable("invalid ptr to member representation"); } -TypeIndex CodeViewDebug::lowerTypeMemberPointer(const DIDerivedType *Ty) { +TypeIndex CodeViewDebug::lowerTypeMemberPointer(const DIDerivedType *Ty, + PointerOptions PO) { assert(Ty->getTag() == dwarf::DW_TAG_ptr_to_member_type); TypeIndex ClassTI = getTypeIndex(Ty->getClassType()); TypeIndex PointeeTI = getTypeIndex(Ty->getBaseType(), Ty->getClassType()); - PointerKind PK = Asm->TM.getPointerSize() == 8 ? PointerKind::Near64 - : PointerKind::Near32; + PointerKind PK = getPointerSizeInBytes() == 8 ? PointerKind::Near64 + : PointerKind::Near32; bool IsPMF = isa<DISubroutineType>(Ty->getBaseType()); PointerMode PM = IsPMF ? PointerMode::PointerToMemberFunction : PointerMode::PointerToDataMember; - PointerOptions PO = PointerOptions::None; // FIXME + assert(Ty->getSizeInBits() / 8 <= 0xff && "pointer size too big"); uint8_t SizeInBytes = Ty->getSizeInBits() / 8; MemberPointerInfo MPI( @@ -1545,6 +1624,7 @@ static CallingConvention dwarfCCToCodeView(unsigned DwarfCC) { TypeIndex CodeViewDebug::lowerTypeModifier(const DIDerivedType *Ty) { ModifierOptions Mods = ModifierOptions::None; + PointerOptions PO = PointerOptions::None; bool IsModifier = true; const DIType *BaseTy = Ty; while (IsModifier && BaseTy) { @@ -1552,9 +1632,16 @@ TypeIndex CodeViewDebug::lowerTypeModifier(const DIDerivedType *Ty) { switch (BaseTy->getTag()) { case dwarf::DW_TAG_const_type: Mods |= ModifierOptions::Const; + PO |= PointerOptions::Const; break; case dwarf::DW_TAG_volatile_type: Mods |= ModifierOptions::Volatile; + PO |= PointerOptions::Volatile; + break; + case dwarf::DW_TAG_restrict_type: + // Only pointer types be marked with __restrict. There is no known flag + // for __restrict in LF_MODIFIER records. + PO |= PointerOptions::Restrict; break; default: IsModifier = false; @@ -1563,7 +1650,31 @@ TypeIndex CodeViewDebug::lowerTypeModifier(const DIDerivedType *Ty) { if (IsModifier) BaseTy = cast<DIDerivedType>(BaseTy)->getBaseType().resolve(); } + + // Check if the inner type will use an LF_POINTER record. If so, the + // qualifiers will go in the LF_POINTER record. This comes up for types like + // 'int *const' and 'int *__restrict', not the more common cases like 'const + // char *'. + if (BaseTy) { + switch (BaseTy->getTag()) { + case dwarf::DW_TAG_pointer_type: + case dwarf::DW_TAG_reference_type: + case dwarf::DW_TAG_rvalue_reference_type: + return lowerTypePointer(cast<DIDerivedType>(BaseTy), PO); + case dwarf::DW_TAG_ptr_to_member_type: + return lowerTypeMemberPointer(cast<DIDerivedType>(BaseTy), PO); + default: + break; + } + } + TypeIndex ModifiedTI = getTypeIndex(BaseTy); + + // Return the base type index if there aren't any modifiers. For example, the + // metadata could contain restrict wrappers around non-pointer types. + if (Mods == ModifierOptions::None) + return ModifiedTI; + ModifierRecord MR(ModifiedTI, Mods); return TypeTable.writeLeafType(MR); } @@ -1573,6 +1684,11 @@ TypeIndex CodeViewDebug::lowerTypeFunction(const DISubroutineType *Ty) { for (DITypeRef ArgTypeRef : Ty->getTypeArray()) ReturnAndArgTypeIndices.push_back(getTypeIndex(ArgTypeRef)); + // MSVC uses type none for variadic argument. + if (ReturnAndArgTypeIndices.size() > 1 && + ReturnAndArgTypeIndices.back() == TypeIndex::Void()) { + ReturnAndArgTypeIndices.back() = TypeIndex::None(); + } TypeIndex ReturnTypeIndex = TypeIndex::Void(); ArrayRef<TypeIndex> ArgTypeIndices = None; if (!ReturnAndArgTypeIndices.empty()) { @@ -1602,6 +1718,11 @@ TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty, for (DITypeRef ArgTypeRef : Ty->getTypeArray()) ReturnAndArgTypeIndices.push_back(getTypeIndex(ArgTypeRef)); + // MSVC uses type none for variadic argument. + if (ReturnAndArgTypeIndices.size() > 1 && + ReturnAndArgTypeIndices.back() == TypeIndex::Void()) { + ReturnAndArgTypeIndices.back() = TypeIndex::None(); + } TypeIndex ReturnTypeIndex = TypeIndex::Void(); ArrayRef<TypeIndex> ArgTypeIndices = None; if (!ReturnAndArgTypeIndices.empty()) { @@ -1716,6 +1837,26 @@ static ClassOptions getCommonClassOptions(const DICompositeType *Ty) { return CO; } +void CodeViewDebug::addUDTSrcLine(const DIType *Ty, TypeIndex TI) { + switch (Ty->getTag()) { + case dwarf::DW_TAG_class_type: + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_union_type: + case dwarf::DW_TAG_enumeration_type: + break; + default: + return; + } + + if (const auto *File = Ty->getFile()) { + StringIdRecord SIDR(TypeIndex(0x0), getFullFilepath(File)); + TypeIndex SIDI = TypeTable.writeLeafType(SIDR); + + UdtSourceLineRecord USLR(TI, SIDI, Ty->getLine()); + TypeTable.writeLeafType(USLR); + } +} + TypeIndex CodeViewDebug::lowerTypeEnum(const DICompositeType *Ty) { ClassOptions CO = getCommonClassOptions(Ty); TypeIndex FTI; @@ -1744,7 +1885,11 @@ TypeIndex CodeViewDebug::lowerTypeEnum(const DICompositeType *Ty) { EnumRecord ER(EnumeratorCount, CO, FTI, FullName, Ty->getIdentifier(), getTypeIndex(Ty->getBaseType())); - return TypeTable.writeLeafType(ER); + TypeIndex EnumTI = TypeTable.writeLeafType(ER); + + addUDTSrcLine(Ty, EnumTI); + + return EnumTI; } //===----------------------------------------------------------------------===// @@ -1793,12 +1938,33 @@ void CodeViewDebug::collectMemberInfo(ClassInfo &Info, Info.Members.push_back({DDTy, 0}); return; } - // An unnamed member must represent a nested struct or union. Add all the - // indirect fields to the current record. + + // An unnamed member may represent a nested struct or union. Attempt to + // interpret the unnamed member as a DICompositeType possibly wrapped in + // qualifier types. Add all the indirect fields to the current record if that + // succeeds, and drop the member if that fails. assert((DDTy->getOffsetInBits() % 8) == 0 && "Unnamed bitfield member!"); uint64_t Offset = DDTy->getOffsetInBits(); const DIType *Ty = DDTy->getBaseType().resolve(); - const DICompositeType *DCTy = cast<DICompositeType>(Ty); + bool FullyResolved = false; + while (!FullyResolved) { + switch (Ty->getTag()) { + case dwarf::DW_TAG_const_type: + case dwarf::DW_TAG_volatile_type: + // FIXME: we should apply the qualifier types to the indirect fields + // rather than dropping them. + Ty = cast<DIDerivedType>(Ty)->getBaseType().resolve(); + break; + default: + FullyResolved = true; + break; + } + } + + const DICompositeType *DCTy = dyn_cast<DICompositeType>(Ty); + if (!DCTy) + return; + ClassInfo NestedInfo = collectClassInfo(DCTy); for (const ClassInfo::MemberInfo &IndirectField : NestedInfo.Members) Info.Members.push_back( @@ -1838,7 +2004,28 @@ ClassInfo CodeViewDebug::collectClassInfo(const DICompositeType *Ty) { return Info; } +static bool shouldAlwaysEmitCompleteClassType(const DICompositeType *Ty) { + // This routine is used by lowerTypeClass and lowerTypeUnion to determine + // if a complete type should be emitted instead of a forward reference. + return Ty->getName().empty() && Ty->getIdentifier().empty() && + !Ty->isForwardDecl(); +} + TypeIndex CodeViewDebug::lowerTypeClass(const DICompositeType *Ty) { + // Emit the complete type for unnamed structs. C++ classes with methods + // which have a circular reference back to the class type are expected to + // be named by the front-end and should not be "unnamed". C unnamed + // structs should not have circular references. + if (shouldAlwaysEmitCompleteClassType(Ty)) { + // If this unnamed complete type is already in the process of being defined + // then the description of the type is malformed and cannot be emitted + // into CodeView correctly so report a fatal error. + auto I = CompleteTypeIndices.find(Ty); + if (I != CompleteTypeIndices.end() && I->second == TypeIndex()) + report_fatal_error("cannot debug circular reference to unnamed type"); + return getCompleteTypeIndex(Ty); + } + // First, construct the forward decl. Don't look into Ty to compute the // forward decl options, since it might not be available in all TUs. TypeRecordKind Kind = getRecordKind(Ty); @@ -1875,13 +2062,7 @@ TypeIndex CodeViewDebug::lowerCompleteTypeClass(const DICompositeType *Ty) { SizeInBytes, FullName, Ty->getIdentifier()); TypeIndex ClassTI = TypeTable.writeLeafType(CR); - if (const auto *File = Ty->getFile()) { - StringIdRecord SIDR(TypeIndex(0x0), getFullFilepath(File)); - TypeIndex SIDI = TypeTable.writeLeafType(SIDR); - - UdtSourceLineRecord USLR(ClassTI, SIDI, Ty->getLine()); - TypeTable.writeLeafType(USLR); - } + addUDTSrcLine(Ty, ClassTI); addToUDTs(Ty); @@ -1889,6 +2070,10 @@ TypeIndex CodeViewDebug::lowerCompleteTypeClass(const DICompositeType *Ty) { } TypeIndex CodeViewDebug::lowerTypeUnion(const DICompositeType *Ty) { + // Emit the complete type for unnamed unions. + if (shouldAlwaysEmitCompleteClassType(Ty)) + return getCompleteTypeIndex(Ty); + ClassOptions CO = ClassOptions::ForwardReference | getCommonClassOptions(Ty); std::string FullName = getFullyQualifiedName(Ty); @@ -1917,11 +2102,7 @@ TypeIndex CodeViewDebug::lowerCompleteTypeUnion(const DICompositeType *Ty) { Ty->getIdentifier()); TypeIndex UnionTI = TypeTable.writeLeafType(UR); - StringIdRecord SIR(TypeIndex(0x0), getFullFilepath(Ty->getFile())); - TypeIndex SIRI = TypeTable.writeLeafType(SIR); - - UdtSourceLineRecord USLR(UnionTI, SIRI, Ty->getLine()); - TypeTable.writeLeafType(USLR); + addUDTSrcLine(Ty, UnionTI); addToUDTs(Ty); @@ -1943,8 +2124,7 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) { for (const DIDerivedType *I : Info.Inheritance) { if (I->getFlags() & DINode::FlagVirtual) { // Virtual base. - // FIXME: Emit VBPtrOffset when the frontend provides it. - unsigned VBPtrOffset = 0; + unsigned VBPtrOffset = I->getVBPtrOffset(); // FIXME: Despite the accessor name, the offset is really in bytes. unsigned VBTableIndex = I->getOffsetInBits() / 4; auto RecordKind = (I->getFlags() & DINode::FlagIndirectVirtualBase) == DINode::FlagIndirectVirtualBase @@ -1956,6 +2136,7 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) { VBTableIndex); ContinuationBuilder.writeMemberType(VBCR); + MemberCount++; } else { assert(I->getOffsetInBits() % 8 == 0 && "bases must be on byte boundaries"); @@ -1963,6 +2144,7 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) { getTypeIndex(I->getBaseType()), I->getOffsetInBits() / 8); ContinuationBuilder.writeMemberType(BCR); + MemberCount++; } } @@ -2121,9 +2303,7 @@ TypeIndex CodeViewDebug::getCompleteTypeIndex(DITypeRef TypeRef) { return getTypeIndex(Ty); } - // Check if we've already translated the complete record type. Lowering a - // complete type should never trigger lowering another complete type, so we - // can reuse the hash table lookup result. + // Check if we've already translated the complete record type. const auto *CTy = cast<DICompositeType>(Ty); auto InsertResult = CompleteTypeIndices.insert({CTy, TypeIndex()}); if (!InsertResult.second) @@ -2134,13 +2314,16 @@ TypeIndex CodeViewDebug::getCompleteTypeIndex(DITypeRef TypeRef) { // Make sure the forward declaration is emitted first. It's unclear if this // is necessary, but MSVC does it, and we should follow suit until we can show // otherwise. - TypeIndex FwdDeclTI = getTypeIndex(CTy); + // We only emit a forward declaration for named types. + if (!CTy->getName().empty() || !CTy->getIdentifier().empty()) { + TypeIndex FwdDeclTI = getTypeIndex(CTy); - // Just use the forward decl if we don't have complete type info. This might - // happen if the frontend is using modules and expects the complete definition - // to be emitted elsewhere. - if (CTy->isForwardDecl()) - return FwdDeclTI; + // Just use the forward decl if we don't have complete type info. This + // might happen if the frontend is using modules and expects the complete + // definition to be emitted elsewhere. + if (CTy->isForwardDecl()) + return FwdDeclTI; + } TypeIndex TI; switch (CTy->getTag()) { @@ -2155,7 +2338,11 @@ TypeIndex CodeViewDebug::getCompleteTypeIndex(DITypeRef TypeRef) { llvm_unreachable("not a record"); } - InsertResult.first->second = TI; + // Update the type index associated with this CompositeType. This cannot + // use the 'InsertResult' iterator above because it is potentially + // invalidated by map insertions which can occur while lowering the class + // type above. + CompleteTypeIndices[CTy] = TI; return TI; } @@ -2179,10 +2366,10 @@ void CodeViewDebug::emitLocalVariableList(ArrayRef<LocalVariable> Locals) { for (const LocalVariable &L : Locals) if (L.DIVar->isParameter()) Params.push_back(&L); - std::sort(Params.begin(), Params.end(), - [](const LocalVariable *L, const LocalVariable *R) { - return L->DIVar->getArg() < R->DIVar->getArg(); - }); + llvm::sort(Params.begin(), Params.end(), + [](const LocalVariable *L, const LocalVariable *R) { + return L->DIVar->getArg() < R->DIVar->getArg(); + }); for (const LocalVariable *L : Params) emitLocalVariable(*L); @@ -2272,15 +2459,150 @@ void CodeViewDebug::emitLocalVariable(const LocalVariable &Var) { } } +void CodeViewDebug::emitLexicalBlockList(ArrayRef<LexicalBlock *> Blocks, + const FunctionInfo& FI) { + for (LexicalBlock *Block : Blocks) + emitLexicalBlock(*Block, FI); +} + +/// Emit an S_BLOCK32 and S_END record pair delimiting the contents of a +/// lexical block scope. +void CodeViewDebug::emitLexicalBlock(const LexicalBlock &Block, + const FunctionInfo& FI) { + MCSymbol *RecordBegin = MMI->getContext().createTempSymbol(), + *RecordEnd = MMI->getContext().createTempSymbol(); + + // Lexical block symbol record. + OS.AddComment("Record length"); + OS.emitAbsoluteSymbolDiff(RecordEnd, RecordBegin, 2); // Record Length + OS.EmitLabel(RecordBegin); + OS.AddComment("Record kind: S_BLOCK32"); + OS.EmitIntValue(SymbolKind::S_BLOCK32, 2); // Record Kind + OS.AddComment("PtrParent"); + OS.EmitIntValue(0, 4); // PtrParent + OS.AddComment("PtrEnd"); + OS.EmitIntValue(0, 4); // PtrEnd + OS.AddComment("Code size"); + OS.emitAbsoluteSymbolDiff(Block.End, Block.Begin, 4); // Code Size + OS.AddComment("Function section relative address"); + OS.EmitCOFFSecRel32(Block.Begin, /*Offset=*/0); // Func Offset + OS.AddComment("Function section index"); + OS.EmitCOFFSectionIndex(FI.Begin); // Func Symbol + OS.AddComment("Lexical block name"); + emitNullTerminatedSymbolName(OS, Block.Name); // Name + OS.EmitLabel(RecordEnd); + + // Emit variables local to this lexical block. + emitLocalVariableList(Block.Locals); + + // Emit lexical blocks contained within this block. + emitLexicalBlockList(Block.Children, FI); + + // Close the lexical block scope. + OS.AddComment("Record length"); + OS.EmitIntValue(2, 2); // Record Length + OS.AddComment("Record kind: S_END"); + OS.EmitIntValue(SymbolKind::S_END, 2); // Record Kind +} + +/// Convenience routine for collecting lexical block information for a list +/// of lexical scopes. +void CodeViewDebug::collectLexicalBlockInfo( + SmallVectorImpl<LexicalScope *> &Scopes, + SmallVectorImpl<LexicalBlock *> &Blocks, + SmallVectorImpl<LocalVariable> &Locals) { + for (LexicalScope *Scope : Scopes) + collectLexicalBlockInfo(*Scope, Blocks, Locals); +} + +/// Populate the lexical blocks and local variable lists of the parent with +/// information about the specified lexical scope. +void CodeViewDebug::collectLexicalBlockInfo( + LexicalScope &Scope, + SmallVectorImpl<LexicalBlock *> &ParentBlocks, + SmallVectorImpl<LocalVariable> &ParentLocals) { + if (Scope.isAbstractScope()) + return; + + auto LocalsIter = ScopeVariables.find(&Scope); + if (LocalsIter == ScopeVariables.end()) { + // This scope does not contain variables and can be eliminated. + collectLexicalBlockInfo(Scope.getChildren(), ParentBlocks, ParentLocals); + return; + } + SmallVectorImpl<LocalVariable> &Locals = LocalsIter->second; + + const DILexicalBlock *DILB = dyn_cast<DILexicalBlock>(Scope.getScopeNode()); + if (!DILB) { + // This scope is not a lexical block and can be eliminated, but keep any + // local variables it contains. + ParentLocals.append(Locals.begin(), Locals.end()); + collectLexicalBlockInfo(Scope.getChildren(), ParentBlocks, ParentLocals); + return; + } + + const SmallVectorImpl<InsnRange> &Ranges = Scope.getRanges(); + if (Ranges.size() != 1 || !getLabelAfterInsn(Ranges.front().second)) { + // This lexical block scope has too many address ranges to represent in the + // current CodeView format or does not have a valid address range. + // Eliminate this lexical scope and promote any locals it contains to the + // parent scope. + // + // For lexical scopes with multiple address ranges you may be tempted to + // construct a single range covering every instruction where the block is + // live and everything in between. Unfortunately, Visual Studio only + // displays variables from the first matching lexical block scope. If the + // first lexical block contains exception handling code or cold code which + // is moved to the bottom of the routine creating a single range covering + // nearly the entire routine, then it will hide all other lexical blocks + // and the variables they contain. + // + ParentLocals.append(Locals.begin(), Locals.end()); + collectLexicalBlockInfo(Scope.getChildren(), ParentBlocks, ParentLocals); + return; + } + + // Create a new CodeView lexical block for this lexical scope. If we've + // seen this DILexicalBlock before then the scope tree is malformed and + // we can handle this gracefully by not processing it a second time. + auto BlockInsertion = CurFn->LexicalBlocks.insert({DILB, LexicalBlock()}); + if (!BlockInsertion.second) + return; + + // Create a lexical block containing the local variables and collect the + // the lexical block information for the children. + const InsnRange &Range = Ranges.front(); + assert(Range.first && Range.second); + LexicalBlock &Block = BlockInsertion.first->second; + Block.Begin = getLabelBeforeInsn(Range.first); + Block.End = getLabelAfterInsn(Range.second); + assert(Block.Begin && "missing label for scope begin"); + assert(Block.End && "missing label for scope end"); + Block.Name = DILB->getName(); + Block.Locals = std::move(Locals); + ParentBlocks.push_back(&Block); + collectLexicalBlockInfo(Scope.getChildren(), Block.Children, Block.Locals); +} + void CodeViewDebug::endFunctionImpl(const MachineFunction *MF) { const Function &GV = MF->getFunction(); assert(FnDebugInfo.count(&GV)); - assert(CurFn == &FnDebugInfo[&GV]); + assert(CurFn == FnDebugInfo[&GV].get()); collectVariableInfo(GV.getSubprogram()); + // Build the lexical block structure to emit for this routine. + if (LexicalScope *CFS = LScopes.getCurrentFunctionScope()) + collectLexicalBlockInfo(*CFS, CurFn->ChildBlocks, CurFn->Locals); + + // Clear the scope and variable information from the map which will not be + // valid after we have finished processing this routine. This also prepares + // the map for the subsequent routine. + ScopeVariables.clear(); + // Don't emit anything if we don't have any line tables. - if (!CurFn->HaveLineInfo) { + // Thunks are compiler-generated and probably won't have source correlation. + if (!CurFn->HaveLineInfo && !GV.getSubprogram()->isThunk()) { FnDebugInfo.erase(&GV); CurFn = nullptr; return; @@ -2296,8 +2618,8 @@ void CodeViewDebug::endFunctionImpl(const MachineFunction *MF) { void CodeViewDebug::beginInstruction(const MachineInstr *MI) { DebugHandlerBase::beginInstruction(MI); - // Ignore DBG_VALUE locations and function prologue. - if (!Asm || !CurFn || MI->isDebugValue() || + // Ignore DBG_VALUE and DBG_LABEL locations and function prologue. + if (!Asm || !CurFn || MI->isDebugInstr() || MI->getFlag(MachineInstr::FrameSetup)) return; @@ -2306,7 +2628,7 @@ void CodeViewDebug::beginInstruction(const MachineInstr *MI) { DebugLoc DL = MI->getDebugLoc(); if (!DL && MI->getParent() != PrevInstBB) { for (const auto &NextMI : *MI->getParent()) { - if (NextMI.isDebugValue()) + if (NextMI.isDebugInstr()) continue; DL = NextMI.getDebugLoc(); if (DL) @@ -2432,6 +2754,7 @@ void CodeViewDebug::emitDebugInfoForGlobal(const DIGlobalVariable *DIGV, // FIXME: Thread local data, etc MCSymbol *DataBegin = MMI->getContext().createTempSymbol(), *DataEnd = MMI->getContext().createTempSymbol(); + const unsigned FixedLengthOfThisRecord = 12; OS.AddComment("Record length"); OS.emitAbsoluteSymbolDiff(DataEnd, DataBegin, 2); OS.EmitLabel(DataBegin); @@ -2459,6 +2782,6 @@ void CodeViewDebug::emitDebugInfoForGlobal(const DIGlobalVariable *DIGV, OS.AddComment("Segment"); OS.EmitCOFFSectionIndex(GVSym); OS.AddComment("Name"); - emitNullTerminatedSymbolName(OS, DIGV->getName()); + emitNullTerminatedSymbolName(OS, DIGV->getName(), FixedLengthOfThisRecord); OS.EmitLabel(DataEnd); } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h index 69e93640d7ef..6a0da5f993d0 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h @@ -48,7 +48,7 @@ class MCStreamer; class MCSymbol; class MachineFunction; -/// \brief Collects and handles line tables information in a CodeView format. +/// Collects and handles line tables information in a CodeView format. class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { MCStreamer &OS; BumpPtrAllocator Allocator; @@ -107,9 +107,23 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { unsigned SiteFuncId = 0; }; + // Combines information from DILexicalBlock and LexicalScope. + struct LexicalBlock { + SmallVector<LocalVariable, 1> Locals; + SmallVector<LexicalBlock *, 1> Children; + const MCSymbol *Begin; + const MCSymbol *End; + StringRef Name; + }; + // For each function, store a vector of labels to its instructions, as well as // to the end of the function. struct FunctionInfo { + FunctionInfo() = default; + + // Uncopyable. + FunctionInfo(const FunctionInfo &FI) = delete; + /// Map from inlined call site to inlined instructions and child inlined /// call sites. Listed in program order. std::unordered_map<const DILocation *, InlineSite> InlineSites; @@ -119,6 +133,11 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { SmallVector<LocalVariable, 1> Locals; + std::unordered_map<const DILexicalBlockBase*, LexicalBlock> LexicalBlocks; + + // Lexical blocks containing local variables. + SmallVector<LexicalBlock *, 1> ChildBlocks; + std::vector<std::pair<MCSymbol *, MDNode *>> Annotations; const MCSymbol *Begin = nullptr; @@ -129,6 +148,12 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { }; FunctionInfo *CurFn = nullptr; + // Map used to seperate variables according to the lexical scope they belong + // in. This is populated by recordLocalVariable() before + // collectLexicalBlocks() separates the variables between the FunctionInfo + // and LexicalBlocks. + DenseMap<const LexicalScope *, SmallVector<LocalVariable, 1>> ScopeVariables; + /// The set of comdat .debug$S sections that we've seen so far. Each section /// must start with a magic version number that must only be emitted once. /// This set tracks which sections we've already opened. @@ -159,7 +184,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { /// Remember some debug info about each function. Keep it in a stable order to /// emit at the end of the TU. - MapVector<const Function *, FunctionInfo> FnDebugInfo; + MapVector<const Function *, std::unique_ptr<FunctionInfo>> FnDebugInfo; /// Map from full file path to .cv_file id. Full paths are built from DIFiles /// and are stored in FileToFilepathMap; @@ -200,7 +225,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { using FileToFilepathMapTy = std::map<const DIFile *, std::string>; FileToFilepathMapTy FileToFilepathMap; - StringRef getFullFilepath(const DIFile *S); + StringRef getFullFilepath(const DIFile *File); unsigned maybeRecordFile(const DIFile *F); @@ -214,7 +239,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { } /// Emit the magic version number at the start of a CodeView type or symbol - /// section. Appears at the front of every .debug$S or .debug$T section. + /// section. Appears at the front of every .debug$S or .debug$T or .debug$P + /// section. void emitCodeViewMagicVersion(); void emitTypeInformation(); @@ -225,6 +251,10 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { void emitInlineeLinesSubsection(); + void emitDebugInfoForThunk(const Function *GV, + FunctionInfo &FI, + const MCSymbol *Fn); + void emitDebugInfoForFunction(const Function *GV, FunctionInfo &FI); void emitDebugInfoForGlobals(); @@ -253,9 +283,18 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { void collectVariableInfoFromMFTable(DenseSet<InlinedVariable> &Processed); + // Construct the lexical block tree for a routine, pruning emptpy lexical + // scopes, and populate it with local variables. + void collectLexicalBlockInfo(SmallVectorImpl<LexicalScope *> &Scopes, + SmallVectorImpl<LexicalBlock *> &Blocks, + SmallVectorImpl<LocalVariable> &Locals); + void collectLexicalBlockInfo(LexicalScope &Scope, + SmallVectorImpl<LexicalBlock *> &ParentBlocks, + SmallVectorImpl<LocalVariable> &ParentLocals); + /// Records information about a local variable in the appropriate scope. In /// particular, locals from inlined code live inside the inlining site. - void recordLocalVariable(LocalVariable &&Var, const DILocation *Loc); + void recordLocalVariable(LocalVariable &&Var, const LexicalScope *LS); /// Emits local variables in the appropriate order. void emitLocalVariableList(ArrayRef<LocalVariable> Locals); @@ -263,6 +302,13 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { /// Emits an S_LOCAL record and its associated defined ranges. void emitLocalVariable(const LocalVariable &Var); + /// Emits a sequence of lexical block scopes and their children. + void emitLexicalBlockList(ArrayRef<LexicalBlock *> Blocks, + const FunctionInfo& FI); + + /// Emit a lexical block scope and its children. + void emitLexicalBlock(const LexicalBlock &Block, const FunctionInfo& FI); + /// Translates the DIType to codeview if necessary and returns a type index /// for it. codeview::TypeIndex getTypeIndex(DITypeRef TypeRef, @@ -279,12 +325,18 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { void addToUDTs(const DIType *Ty); + void addUDTSrcLine(const DIType *Ty, codeview::TypeIndex TI); + codeview::TypeIndex lowerType(const DIType *Ty, const DIType *ClassTy); codeview::TypeIndex lowerTypeAlias(const DIDerivedType *Ty); codeview::TypeIndex lowerTypeArray(const DICompositeType *Ty); codeview::TypeIndex lowerTypeBasic(const DIBasicType *Ty); - codeview::TypeIndex lowerTypePointer(const DIDerivedType *Ty); - codeview::TypeIndex lowerTypeMemberPointer(const DIDerivedType *Ty); + codeview::TypeIndex lowerTypePointer( + const DIDerivedType *Ty, + codeview::PointerOptions PO = codeview::PointerOptions::None); + codeview::TypeIndex lowerTypeMemberPointer( + const DIDerivedType *Ty, + codeview::PointerOptions PO = codeview::PointerOptions::None); codeview::TypeIndex lowerTypeModifier(const DIDerivedType *Ty); codeview::TypeIndex lowerTypeFunction(const DISubroutineType *Ty); codeview::TypeIndex lowerTypeVFTableShape(const DIDerivedType *Ty); @@ -327,21 +379,21 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { unsigned getPointerSizeInBytes(); protected: - /// \brief Gather pre-function debug information. + /// Gather pre-function debug information. void beginFunctionImpl(const MachineFunction *MF) override; - /// \brief Gather post-function debug information. + /// Gather post-function debug information. void endFunctionImpl(const MachineFunction *) override; public: - CodeViewDebug(AsmPrinter *Asm); + CodeViewDebug(AsmPrinter *AP); void setSymbolSize(const MCSymbol *, uint64_t) override {} - /// \brief Emit the COFF section that holds the line table information. + /// Emit the COFF section that holds the line table information. void endModule() override; - /// \brief Process beginning of an instruction. + /// Process beginning of an instruction. void beginInstruction(const MachineInstr *MI) override; }; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp index b3148db30cd6..570424a79c81 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -17,6 +17,7 @@ #include "DwarfUnit.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -86,8 +87,9 @@ void DIEAbbrev::Emit(const AsmPrinter *AP) const { // easily, which helps track down where it came from. if (!dwarf::isValidFormForVersion(AttrData.getForm(), AP->getDwarfVersion())) { - DEBUG(dbgs() << "Invalid form " << format("0x%x", AttrData.getForm()) - << " for DWARF version " << AP->getDwarfVersion() << "\n"); + LLVM_DEBUG(dbgs() << "Invalid form " << format("0x%x", AttrData.getForm()) + << " for DWARF version " << AP->getDwarfVersion() + << "\n"); llvm_unreachable("Invalid form for specified DWARF version"); } #endif @@ -388,6 +390,7 @@ void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const { case dwarf::DW_FORM_data2: case dwarf::DW_FORM_strx2: case dwarf::DW_FORM_addrx2: + case dwarf::DW_FORM_strx3: case dwarf::DW_FORM_strp: case dwarf::DW_FORM_ref4: case dwarf::DW_FORM_data4: @@ -410,6 +413,7 @@ void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const { case dwarf::DW_FORM_GNU_str_index: case dwarf::DW_FORM_GNU_addr_index: case dwarf::DW_FORM_ref_udata: + case dwarf::DW_FORM_strx: case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return; @@ -423,58 +427,23 @@ void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const { /// SizeOf - Determine size of integer value in bytes. /// unsigned DIEInteger::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { + dwarf::FormParams Params = {0, 0, dwarf::DWARF32}; + if (AP) + Params = {AP->getDwarfVersion(), uint8_t(AP->getPointerSize()), + AP->OutStreamer->getContext().getDwarfFormat()}; + + if (Optional<uint8_t> FixedSize = dwarf::getFixedFormByteSize(Form, Params)) + return *FixedSize; + switch (Form) { - case dwarf::DW_FORM_implicit_const: - case dwarf::DW_FORM_flag_present: - return 0; - case dwarf::DW_FORM_flag: - case dwarf::DW_FORM_ref1: - case dwarf::DW_FORM_data1: - case dwarf::DW_FORM_strx1: - case dwarf::DW_FORM_addrx1: - return sizeof(int8_t); - case dwarf::DW_FORM_ref2: - case dwarf::DW_FORM_data2: - case dwarf::DW_FORM_strx2: - case dwarf::DW_FORM_addrx2: - return sizeof(int16_t); - case dwarf::DW_FORM_ref4: - case dwarf::DW_FORM_data4: - case dwarf::DW_FORM_ref_sup4: - case dwarf::DW_FORM_strx4: - case dwarf::DW_FORM_addrx4: - return sizeof(int32_t); - case dwarf::DW_FORM_ref8: - case dwarf::DW_FORM_ref_sig8: - case dwarf::DW_FORM_data8: - case dwarf::DW_FORM_ref_sup8: - return sizeof(int64_t); - case dwarf::DW_FORM_ref_addr: - if (AP->getDwarfVersion() == 2) - return AP->getPointerSize(); - LLVM_FALLTHROUGH; - case dwarf::DW_FORM_strp: - case dwarf::DW_FORM_GNU_ref_alt: - case dwarf::DW_FORM_GNU_strp_alt: - case dwarf::DW_FORM_line_strp: - case dwarf::DW_FORM_sec_offset: - case dwarf::DW_FORM_strp_sup: - switch (AP->OutStreamer->getContext().getDwarfFormat()) { - case dwarf::DWARF32: - return 4; - case dwarf::DWARF64: - return 8; - } - llvm_unreachable("Invalid DWARF format"); case dwarf::DW_FORM_GNU_str_index: case dwarf::DW_FORM_GNU_addr_index: case dwarf::DW_FORM_ref_udata: + case dwarf::DW_FORM_strx: case dwarf::DW_FORM_udata: return getULEB128Size(Integer); case dwarf::DW_FORM_sdata: return getSLEB128Size(Integer); - case dwarf::DW_FORM_addr: - return AP->getPointerSize(); default: llvm_unreachable("DIE Value form not supported yet"); } } @@ -564,44 +533,46 @@ void DIEDelta::print(raw_ostream &O) const { /// EmitValue - Emit string value. /// void DIEString::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { - assert( - (Form == dwarf::DW_FORM_strp || Form == dwarf::DW_FORM_GNU_str_index) && - "Expected valid string form"); - // Index of string in symbol table. - if (Form == dwarf::DW_FORM_GNU_str_index) { + switch (Form) { + case dwarf::DW_FORM_GNU_str_index: + case dwarf::DW_FORM_strx: + case dwarf::DW_FORM_strx1: + case dwarf::DW_FORM_strx2: + case dwarf::DW_FORM_strx3: + case dwarf::DW_FORM_strx4: DIEInteger(S.getIndex()).EmitValue(AP, Form); return; - } - - // Relocatable symbol. - assert(Form == dwarf::DW_FORM_strp); - if (AP->MAI->doesDwarfUseRelocationsAcrossSections()) { - DIELabel(S.getSymbol()).EmitValue(AP, Form); + case dwarf::DW_FORM_strp: + if (AP->MAI->doesDwarfUseRelocationsAcrossSections()) + DIELabel(S.getSymbol()).EmitValue(AP, Form); + else + DIEInteger(S.getOffset()).EmitValue(AP, Form); return; + default: + llvm_unreachable("Expected valid string form"); } - - // Offset into symbol table. - DIEInteger(S.getOffset()).EmitValue(AP, Form); } /// SizeOf - Determine size of delta value in bytes. /// unsigned DIEString::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { - assert( - (Form == dwarf::DW_FORM_strp || Form == dwarf::DW_FORM_GNU_str_index) && - "Expected valid string form"); - // Index of string in symbol table. - if (Form == dwarf::DW_FORM_GNU_str_index) + switch (Form) { + case dwarf::DW_FORM_GNU_str_index: + case dwarf::DW_FORM_strx: + case dwarf::DW_FORM_strx1: + case dwarf::DW_FORM_strx2: + case dwarf::DW_FORM_strx3: + case dwarf::DW_FORM_strx4: return DIEInteger(S.getIndex()).SizeOf(AP, Form); - - // Relocatable symbol. - if (AP->MAI->doesDwarfUseRelocationsAcrossSections()) - return DIELabel(S.getSymbol()).SizeOf(AP, Form); - - // Offset into symbol table. - return DIEInteger(S.getOffset()).SizeOf(AP, Form); + case dwarf::DW_FORM_strp: + if (AP->MAI->doesDwarfUseRelocationsAcrossSections()) + return DIELabel(S.getSymbol()).SizeOf(AP, Form); + return DIEInteger(S.getOffset()).SizeOf(AP, Form); + default: + llvm_unreachable("Expected valid string form"); + } } LLVM_DUMP_METHOD @@ -615,8 +586,8 @@ void DIEString::print(raw_ostream &O) const { void DIEInlineString::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_string) { for (char ch : S) - AP->EmitInt8(ch); - AP->EmitInt8(0); + AP->emitInt8(ch); + AP->emitInt8(0); return; } llvm_unreachable("Expected valid string form"); @@ -722,9 +693,9 @@ unsigned DIELoc::ComputeSize(const AsmPrinter *AP) const { void DIELoc::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const { switch (Form) { default: llvm_unreachable("Improper form for block"); - case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break; - case dwarf::DW_FORM_block2: Asm->EmitInt16(Size); break; - case dwarf::DW_FORM_block4: Asm->EmitInt32(Size); break; + case dwarf::DW_FORM_block1: Asm->emitInt8(Size); break; + case dwarf::DW_FORM_block2: Asm->emitInt16(Size); break; + case dwarf::DW_FORM_block4: Asm->emitInt32(Size); break; case dwarf::DW_FORM_block: case dwarf::DW_FORM_exprloc: Asm->EmitULEB128(Size); break; @@ -773,10 +744,11 @@ unsigned DIEBlock::ComputeSize(const AsmPrinter *AP) const { void DIEBlock::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const { switch (Form) { default: llvm_unreachable("Improper form for block"); - case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break; - case dwarf::DW_FORM_block2: Asm->EmitInt16(Size); break; - case dwarf::DW_FORM_block4: Asm->EmitInt32(Size); break; + case dwarf::DW_FORM_block1: Asm->emitInt8(Size); break; + case dwarf::DW_FORM_block2: Asm->emitInt16(Size); break; + case dwarf::DW_FORM_block4: Asm->emitInt32(Size); break; case dwarf::DW_FORM_block: Asm->EmitULEB128(Size); break; + case dwarf::DW_FORM_string: break; case dwarf::DW_FORM_data16: break; } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp index 15ade3c96dfe..b8f1202494d7 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -28,7 +28,7 @@ using namespace llvm; #define DEBUG_TYPE "dwarfdebug" -/// \brief Grabs the string in whichever attribute is passed in and returns +/// Grabs the string in whichever attribute is passed in and returns /// a reference to it. static StringRef getDIEStringAttr(const DIE &Die, uint16_t Attr) { // Iterate through all the attributes until we find the one we're @@ -40,10 +40,10 @@ static StringRef getDIEStringAttr(const DIE &Die, uint16_t Attr) { return StringRef(""); } -/// \brief Adds the string in \p Str to the hash. This also hashes +/// Adds the string in \p Str to the hash. This also hashes /// a trailing NULL with the string. void DIEHash::addString(StringRef Str) { - DEBUG(dbgs() << "Adding string " << Str << " to hash.\n"); + LLVM_DEBUG(dbgs() << "Adding string " << Str << " to hash.\n"); Hash.update(Str); Hash.update(makeArrayRef((uint8_t)'\0')); } @@ -51,9 +51,9 @@ void DIEHash::addString(StringRef Str) { // FIXME: The LEB128 routines are copied and only slightly modified out of // LEB128.h. -/// \brief Adds the unsigned in \p Value to the hash encoded as a ULEB128. +/// Adds the unsigned in \p Value to the hash encoded as a ULEB128. void DIEHash::addULEB128(uint64_t Value) { - DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n"); + LLVM_DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n"); do { uint8_t Byte = Value & 0x7f; Value >>= 7; @@ -64,7 +64,7 @@ void DIEHash::addULEB128(uint64_t Value) { } void DIEHash::addSLEB128(int64_t Value) { - DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n"); + LLVM_DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n"); bool More; do { uint8_t Byte = Value & 0x7f; @@ -77,10 +77,10 @@ void DIEHash::addSLEB128(int64_t Value) { } while (More); } -/// \brief Including \p Parent adds the context of Parent to the hash.. +/// Including \p Parent adds the context of Parent to the hash.. void DIEHash::addParentContext(const DIE &Parent) { - DEBUG(dbgs() << "Adding parent context to hash...\n"); + LLVM_DEBUG(dbgs() << "Adding parent context to hash...\n"); // [7.27.2] For each surrounding type or namespace beginning with the // outermost such construct... @@ -108,7 +108,7 @@ void DIEHash::addParentContext(const DIE &Parent) { // ... Then the name, taken from the DW_AT_name attribute. StringRef Name = getDIEStringAttr(Die, dwarf::DW_AT_name); - DEBUG(dbgs() << "... adding context: " << Name << "\n"); + LLVM_DEBUG(dbgs() << "... adding context: " << Name << "\n"); if (!Name.empty()) addString(Name); } @@ -118,9 +118,9 @@ void DIEHash::addParentContext(const DIE &Parent) { void DIEHash::collectAttributes(const DIE &Die, DIEAttrs &Attrs) { for (const auto &V : Die.values()) { - DEBUG(dbgs() << "Attribute: " - << dwarf::AttributeString(V.getAttribute()) - << " added.\n"); + LLVM_DEBUG(dbgs() << "Attribute: " + << dwarf::AttributeString(V.getAttribute()) + << " added.\n"); switch (V.getAttribute()) { #define HANDLE_DIE_HASH_ATTR(NAME) \ case dwarf::NAME: \ diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h index 29337ae38a99..dae517ab2c29 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h @@ -23,7 +23,7 @@ namespace llvm { class AsmPrinter; class CompileUnit; -/// \brief An object containing the capability of hashing and adding hash +/// An object containing the capability of hashing and adding hash /// attributes onto a DIE. class DIEHash { // Collection of all attributes used in hashing a particular DIE. @@ -35,66 +35,66 @@ class DIEHash { public: DIEHash(AsmPrinter *A = nullptr) : AP(A) {} - /// \brief Computes the CU signature. + /// Computes the CU signature. uint64_t computeCUSignature(StringRef DWOName, const DIE &Die); - /// \brief Computes the type signature. + /// Computes the type signature. uint64_t computeTypeSignature(const DIE &Die); // Helper routines to process parts of a DIE. private: - /// \brief Adds the parent context of \param Die to the hash. - void addParentContext(const DIE &Die); + /// Adds the parent context of \param Parent to the hash. + void addParentContext(const DIE &Parent); - /// \brief Adds the attributes of \param Die to the hash. + /// Adds the attributes of \param Die to the hash. void addAttributes(const DIE &Die); - /// \brief Computes the full DWARF4 7.27 hash of the DIE. + /// Computes the full DWARF4 7.27 hash of the DIE. void computeHash(const DIE &Die); // Routines that add DIEValues to the hash. public: - /// \brief Adds \param Value to the hash. + /// Adds \param Value to the hash. void update(uint8_t Value) { Hash.update(Value); } - /// \brief Encodes and adds \param Value to the hash as a ULEB128. + /// Encodes and adds \param Value to the hash as a ULEB128. void addULEB128(uint64_t Value); - /// \brief Encodes and adds \param Value to the hash as a SLEB128. + /// Encodes and adds \param Value to the hash as a SLEB128. void addSLEB128(int64_t Value); private: - /// \brief Adds \param Str to the hash and includes a NULL byte. + /// Adds \param Str to the hash and includes a NULL byte. void addString(StringRef Str); - /// \brief Collects the attributes of DIE \param Die into the \param Attrs + /// Collects the attributes of DIE \param Die into the \param Attrs /// structure. void collectAttributes(const DIE &Die, DIEAttrs &Attrs); - /// \brief Hashes the attributes in \param Attrs in order. + /// Hashes the attributes in \param Attrs in order. void hashAttributes(const DIEAttrs &Attrs, dwarf::Tag Tag); - /// \brief Hashes the data in a block like DIEValue, e.g. DW_FORM_block or + /// Hashes the data in a block like DIEValue, e.g. DW_FORM_block or /// DW_FORM_exprloc. void hashBlockData(const DIE::const_value_range &Values); - /// \brief Hashes the contents pointed to in the .debug_loc section. + /// Hashes the contents pointed to in the .debug_loc section. void hashLocList(const DIELocList &LocList); - /// \brief Hashes an individual attribute. + /// Hashes an individual attribute. void hashAttribute(const DIEValue &Value, dwarf::Tag Tag); - /// \brief Hashes an attribute that refers to another DIE. + /// Hashes an attribute that refers to another DIE. void hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag, const DIE &Entry); - /// \brief Hashes a reference to a named type in such a way that is + /// Hashes a reference to a named type in such a way that is /// independent of whether that type is described by a declaration or a /// definition. void hashShallowTypeReference(dwarf::Attribute Attribute, const DIE &Entry, StringRef Name); - /// \brief Hashes a reference to a previously referenced type DIE. + /// Hashes a reference to a previously referenced type DIE. void hashRepeatedTypeReference(dwarf::Attribute Attribute, unsigned DieNumber); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp index 856758c8e4f6..25518a339c61 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp @@ -31,7 +31,7 @@ using namespace llvm; #define DEBUG_TYPE "dwarfdebug" -// \brief If @MI is a DBG_VALUE with debug value described by a +// If @MI is a DBG_VALUE with debug value described by a // defined register, returns the number of this register. // In the other case, returns 0. static unsigned isDescribedByReg(const MachineInstr &MI) { @@ -50,8 +50,8 @@ void DbgValueHistoryMap::startInstrRange(InlinedVariable Var, auto &Ranges = VarInstrRanges[Var]; if (!Ranges.empty() && Ranges.back().second == nullptr && Ranges.back().first->isIdenticalTo(MI)) { - DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n" - << "\t" << Ranges.back().first << "\t" << MI << "\n"); + LLVM_DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n" + << "\t" << Ranges.back().first << "\t" << MI << "\n"); return; } Ranges.push_back(std::make_pair(&MI, nullptr)); @@ -86,7 +86,7 @@ using RegDescribedVarsMap = std::map<unsigned, SmallVector<InlinedVariable, 1>>; } // end anonymous namespace -// \brief Claim that @Var is not described by @RegNo anymore. +// Claim that @Var is not described by @RegNo anymore. static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo, InlinedVariable Var) { const auto &I = RegVars.find(RegNo); @@ -100,7 +100,7 @@ static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo, RegVars.erase(I); } -// \brief Claim that @Var is now described by @RegNo. +// Claim that @Var is now described by @RegNo. static void addRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo, InlinedVariable Var) { assert(RegNo != 0U); @@ -109,7 +109,7 @@ static void addRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo, VarSet.push_back(Var); } -// \brief Terminate the location range for variables described by register at +// Terminate the location range for variables described by register at // @I by inserting @ClobberingInstr to their history. static void clobberRegisterUses(RegDescribedVarsMap &RegVars, RegDescribedVarsMap::iterator I, @@ -122,7 +122,7 @@ static void clobberRegisterUses(RegDescribedVarsMap &RegVars, RegVars.erase(I); } -// \brief Terminate the location range for variables described by register +// Terminate the location range for variables described by register // @RegNo by inserting @ClobberingInstr to their history. static void clobberRegisterUses(RegDescribedVarsMap &RegVars, unsigned RegNo, DbgValueHistoryMap &HistMap, @@ -133,7 +133,7 @@ static void clobberRegisterUses(RegDescribedVarsMap &RegVars, unsigned RegNo, clobberRegisterUses(RegVars, I, HistMap, ClobberingInstr); } -// \brief Returns the first instruction in @MBB which corresponds to +// Returns the first instruction in @MBB which corresponds to // the function epilogue, or nullptr if @MBB doesn't contain an epilogue. static const MachineInstr *getFirstEpilogueInst(const MachineBasicBlock &MBB) { auto LastMI = MBB.getLastNonDebugInstr(); @@ -155,7 +155,7 @@ static const MachineInstr *getFirstEpilogueInst(const MachineBasicBlock &MBB) { return &*MBB.begin(); } -// \brief Collect registers that are modified in the function body (their +// Collect registers that are modified in the function body (their // contents is changed outside of the prologue and epilogue). static void collectChangingRegs(const MachineFunction *MF, const TargetRegisterInfo *TRI, @@ -198,7 +198,7 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF, RegDescribedVarsMap RegVars; for (const auto &MBB : *MF) { for (const auto &MI : MBB) { - if (!MI.isDebugValue()) { + if (!MI.isDebugInstr()) { // Not a DBG_VALUE instruction. It may clobber registers which describe // some variables. for (const MachineOperand &MO : MI.operands()) { @@ -234,6 +234,10 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF, continue; } + // Skip DBG_LABEL instructions. + if (MI.isDebugLabel()) + continue; + assert(MI.getNumOperands() > 1 && "Invalid DBG_VALUE instruction!"); // Use the base variable (without any DW_OP_piece expressions) // as index into History. The full variables including the @@ -265,3 +269,33 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF, } } } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void DbgValueHistoryMap::dump() const { + dbgs() << "DbgValueHistoryMap:\n"; + for (const auto &VarRangePair : *this) { + const InlinedVariable &Var = VarRangePair.first; + const InstrRanges &Ranges = VarRangePair.second; + + const DILocalVariable *LocalVar = Var.first; + const DILocation *Location = Var.second; + + dbgs() << " - " << LocalVar->getName() << " at "; + + if (Location) + dbgs() << Location->getFilename() << ":" << Location->getLine() << ":" + << Location->getColumn(); + else + dbgs() << "<unknown location>"; + + dbgs() << " --\n"; + + for (const InstrRange &Range : Ranges) { + dbgs() << " Begin: " << *Range.first; + if (Range.second) + dbgs() << " End : " << *Range.second; + dbgs() << "\n"; + } + } +} +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h index a7b0562e8102..a262cb38b175 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h @@ -52,6 +52,10 @@ public: void clear() { VarInstrRanges.clear(); } InstrRangesMap::const_iterator begin() const { return VarInstrRanges.begin(); } InstrRangesMap::const_iterator end() const { return VarInstrRanges.end(); } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + LLVM_DUMP_METHOD void dump() const; +#endif }; void calculateDbgValueHistory(const MachineFunction *MF, diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp index 2e5c22447936..82e14dc13cb1 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp @@ -25,6 +25,8 @@ using namespace llvm; +#define DEBUG_TYPE "dwarfdebug" + Optional<DbgVariableLocation> DbgVariableLocation::extractFromMachineInstruction( const MachineInstr &Instruction) { @@ -123,29 +125,6 @@ MCSymbol *DebugHandlerBase::getLabelAfterInsn(const MachineInstr *MI) { return LabelsAfterInsn.lookup(MI); } -int DebugHandlerBase::fragmentCmp(const DIExpression *P1, - const DIExpression *P2) { - auto Fragment1 = *P1->getFragmentInfo(); - auto Fragment2 = *P2->getFragmentInfo(); - unsigned l1 = Fragment1.OffsetInBits; - unsigned l2 = Fragment2.OffsetInBits; - unsigned r1 = l1 + Fragment1.SizeInBits; - unsigned r2 = l2 + Fragment2.SizeInBits; - if (r1 <= l2) - return -1; - else if (r2 <= l1) - return 1; - else - return 0; -} - -bool DebugHandlerBase::fragmentsOverlap(const DIExpression *P1, - const DIExpression *P2) { - if (!P1->isFragment() || !P2->isFragment()) - return true; - return fragmentCmp(P1, P2) == 0; -} - /// If this type is derived from a base type then return base type size. uint64_t DebugHandlerBase::getBaseTypeSize(const DITypeRef TyRef) { DIType *Ty = TyRef.resolve(); @@ -213,6 +192,7 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) { assert(DbgValues.empty() && "DbgValues map wasn't cleaned!"); calculateDbgValueHistory(MF, Asm->MF->getSubtarget().getRegisterInfo(), DbgValues); + LLVM_DEBUG(DbgValues.dump()); // Request labels for the full history. for (const auto &I : DbgValues) { @@ -232,8 +212,8 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) { const DIExpression *Fragment = I->first->getDebugExpression(); if (std::all_of(Ranges.begin(), I, [&](DbgValueHistoryMap::InstrRange Pred) { - return !fragmentsOverlap( - Fragment, Pred.first->getDebugExpression()); + return !Fragment->fragmentsOverlap( + Pred.first->getDebugExpression()); })) LabelsBeforeInsn[I->first] = Asm->getFunctionBegin(); else diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h index 245d70038de9..1ccefe32be75 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h @@ -122,14 +122,6 @@ public: /// Return Label immediately following the instruction. MCSymbol *getLabelAfterInsn(const MachineInstr *MI); - /// Determine the relative position of the fragments described by P1 and P2. - /// Returns -1 if P1 is entirely before P2, 0 if P1 and P2 overlap, 1 if P1 is - /// entirely after P2. - static int fragmentCmp(const DIExpression *P1, const DIExpression *P2); - - /// Determine whether two variable fragments overlap. - static bool fragmentsOverlap(const DIExpression *P1, const DIExpression *P2); - /// If this type is derived from a base type then return base type size. static uint64_t getBaseTypeSize(const DITypeRef TyRef); }; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h index 3d6d8a76529c..ac49657b68fa 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h @@ -11,6 +11,7 @@ #define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H #include "DebugLocStream.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" #include "llvm/MC/MCSymbol.h" @@ -20,7 +21,7 @@ namespace llvm { class AsmPrinter; -/// \brief This struct describes location entries emitted in the .debug_loc +/// This struct describes location entries emitted in the .debug_loc /// section. class DebugLocEntry { /// Begin and end symbols for the address range that this location is valid. @@ -28,7 +29,7 @@ class DebugLocEntry { const MCSymbol *End; public: - /// \brief A single location or constant. + /// A single location or constant. struct Value { Value(const DIExpression *Expr, int64_t i) : Expression(Expr), EntryKind(E_Integer) { @@ -105,13 +106,13 @@ public: Values.push_back(std::move(Val)); } - /// \brief If this and Next are describing different pieces of the same + /// If this and Next are describing different pieces of the same /// variable, merge them by appending Next's values to the current /// list of values. /// Return true if the merge was successful. bool MergeValues(const DebugLocEntry &Next); - /// \brief Attempt to merge this DebugLocEntry with Next and return + /// Attempt to merge this DebugLocEntry with Next and return /// true if the merge was successful. Entries can be merged if they /// share the same Loc/Constant and if Next immediately follows this /// Entry. @@ -135,10 +136,10 @@ public: }) && "value must be a piece"); } - // \brief Sort the pieces by offset. + // Sort the pieces by offset. // Remove any duplicate entries by dropping all but the first. void sortUniqueValues() { - std::sort(Values.begin(), Values.end()); + llvm::sort(Values.begin(), Values.end()); Values.erase( std::unique( Values.begin(), Values.end(), [](const Value &A, const Value &B) { @@ -147,12 +148,12 @@ public: Values.end()); } - /// \brief Lower this entry into a DWARF expression. + /// Lower this entry into a DWARF expression. void finalize(const AsmPrinter &AP, DebugLocStream::ListBuilder &List, const DIBasicType *BT); }; -/// \brief Compare two Values for equality. +/// Compare two Values for equality. inline bool operator==(const DebugLocEntry::Value &A, const DebugLocEntry::Value &B) { if (A.EntryKind != B.EntryKind) diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h index 0c551dfff9cc..8dcf5cbc1889 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h @@ -22,7 +22,7 @@ class DwarfCompileUnit; class MachineInstr; class MCSymbol; -/// \brief Byte stream of .debug_loc entries. +/// Byte stream of .debug_loc entries. /// /// Stores a unified stream of .debug_loc entries. There's \a List for each /// variable/inlined-at pair, and an \a Entry for each \a DebugLocEntry. @@ -55,7 +55,7 @@ private: SmallString<256> DWARFBytes; SmallVector<std::string, 32> Comments; - /// \brief Only verbose textual output needs comments. This will be set to + /// Only verbose textual output needs comments. This will be set to /// true for that case, and false otherwise. bool GenerateComments; @@ -69,7 +69,7 @@ public: class EntryBuilder; private: - /// \brief Start a new .debug_loc entry list. + /// Start a new .debug_loc entry list. /// /// Start a new .debug_loc entry list. Return the new list's index so it can /// be retrieved later via \a getList(). @@ -89,7 +89,7 @@ private: /// \return false iff the list is deleted. bool finalizeList(AsmPrinter &Asm); - /// \brief Start a new .debug_loc entry. + /// Start a new .debug_loc entry. /// /// Until the next call, bytes added to the stream will be added to this /// entry. diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp deleted file mode 100644 index c21b3d3451ad..000000000000 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp +++ /dev/null @@ -1,293 +0,0 @@ -//===- llvm/CodeGen/DwarfAccelTable.cpp - Dwarf Accelerator Tables --------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains support for writing dwarf accelerator tables. -// -//===----------------------------------------------------------------------===// - -#include "DwarfAccelTable.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/ADT/Twine.h" -#include "llvm/BinaryFormat/Dwarf.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/DIE.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/Support/raw_ostream.h" -#include <algorithm> -#include <cassert> -#include <cstddef> -#include <cstdint> -#include <iterator> -#include <limits> -#include <vector> - -using namespace llvm; - -// The length of the header data is always going to be 4 + 4 + 4*NumAtoms. -DwarfAccelTable::DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom> atomList) - : Header(8 + (atomList.size() * 4)), HeaderData(atomList), - Entries(Allocator) {} - -void DwarfAccelTable::AddName(DwarfStringPoolEntryRef Name, const DIE *die, - char Flags) { - assert(Data.empty() && "Already finalized!"); - // If the string is in the list already then add this die to the list - // otherwise add a new one. - DataArray &DIEs = Entries[Name.getString()]; - assert(!DIEs.Name || DIEs.Name == Name); - DIEs.Name = Name; - DIEs.Values.push_back(new (Allocator) HashDataContents(die, Flags)); -} - -void DwarfAccelTable::ComputeBucketCount() { - // First get the number of unique hashes. - std::vector<uint32_t> uniques(Data.size()); - for (size_t i = 0, e = Data.size(); i < e; ++i) - uniques[i] = Data[i]->HashValue; - array_pod_sort(uniques.begin(), uniques.end()); - std::vector<uint32_t>::iterator p = - std::unique(uniques.begin(), uniques.end()); - uint32_t num = std::distance(uniques.begin(), p); - - // Then compute the bucket size, minimum of 1 bucket. - if (num > 1024) - Header.bucket_count = num / 4; - else if (num > 16) - Header.bucket_count = num / 2; - else - Header.bucket_count = num > 0 ? num : 1; - - Header.hashes_count = num; -} - -// compareDIEs - comparison predicate that sorts DIEs by their offset. -static bool compareDIEs(const DwarfAccelTable::HashDataContents *A, - const DwarfAccelTable::HashDataContents *B) { - return A->Die->getOffset() < B->Die->getOffset(); -} - -void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, StringRef Prefix) { - // Create the individual hash data outputs. - Data.reserve(Entries.size()); - for (StringMap<DataArray>::iterator EI = Entries.begin(), EE = Entries.end(); - EI != EE; ++EI) { - - // Unique the entries. - std::stable_sort(EI->second.Values.begin(), EI->second.Values.end(), compareDIEs); - EI->second.Values.erase( - std::unique(EI->second.Values.begin(), EI->second.Values.end()), - EI->second.Values.end()); - - HashData *Entry = new (Allocator) HashData(EI->getKey(), EI->second); - Data.push_back(Entry); - } - - // Figure out how many buckets we need, then compute the bucket - // contents and the final ordering. We'll emit the hashes and offsets - // by doing a walk during the emission phase. We add temporary - // symbols to the data so that we can reference them during the offset - // later, we'll emit them when we emit the data. - ComputeBucketCount(); - - // Compute bucket contents and final ordering. - Buckets.resize(Header.bucket_count); - for (size_t i = 0, e = Data.size(); i < e; ++i) { - uint32_t bucket = Data[i]->HashValue % Header.bucket_count; - Buckets[bucket].push_back(Data[i]); - Data[i]->Sym = Asm->createTempSymbol(Prefix); - } - - // Sort the contents of the buckets by hash value so that hash - // collisions end up together. Stable sort makes testing easier and - // doesn't cost much more. - for (size_t i = 0; i < Buckets.size(); ++i) - std::stable_sort(Buckets[i].begin(), Buckets[i].end(), - [] (HashData *LHS, HashData *RHS) { - return LHS->HashValue < RHS->HashValue; - }); -} - -// Emits the header for the table via the AsmPrinter. -void DwarfAccelTable::EmitHeader(AsmPrinter *Asm) { - Asm->OutStreamer->AddComment("Header Magic"); - Asm->EmitInt32(Header.magic); - Asm->OutStreamer->AddComment("Header Version"); - Asm->EmitInt16(Header.version); - Asm->OutStreamer->AddComment("Header Hash Function"); - Asm->EmitInt16(Header.hash_function); - Asm->OutStreamer->AddComment("Header Bucket Count"); - Asm->EmitInt32(Header.bucket_count); - Asm->OutStreamer->AddComment("Header Hash Count"); - Asm->EmitInt32(Header.hashes_count); - Asm->OutStreamer->AddComment("Header Data Length"); - Asm->EmitInt32(Header.header_data_len); - Asm->OutStreamer->AddComment("HeaderData Die Offset Base"); - Asm->EmitInt32(HeaderData.die_offset_base); - Asm->OutStreamer->AddComment("HeaderData Atom Count"); - Asm->EmitInt32(HeaderData.Atoms.size()); - for (size_t i = 0; i < HeaderData.Atoms.size(); i++) { - Atom A = HeaderData.Atoms[i]; - Asm->OutStreamer->AddComment(dwarf::AtomTypeString(A.type)); - Asm->EmitInt16(A.type); - Asm->OutStreamer->AddComment(dwarf::FormEncodingString(A.form)); - Asm->EmitInt16(A.form); - } -} - -// Walk through and emit the buckets for the table. Each index is -// an offset into the list of hashes. -void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) { - unsigned index = 0; - for (size_t i = 0, e = Buckets.size(); i < e; ++i) { - Asm->OutStreamer->AddComment("Bucket " + Twine(i)); - if (!Buckets[i].empty()) - Asm->EmitInt32(index); - else - Asm->EmitInt32(std::numeric_limits<uint32_t>::max()); - // Buckets point in the list of hashes, not to the data. Do not - // increment the index multiple times in case of hash collisions. - uint64_t PrevHash = std::numeric_limits<uint64_t>::max(); - for (auto *HD : Buckets[i]) { - uint32_t HashValue = HD->HashValue; - if (PrevHash != HashValue) - ++index; - PrevHash = HashValue; - } - } -} - -// Walk through the buckets and emit the individual hashes for each -// bucket. -void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) { - uint64_t PrevHash = std::numeric_limits<uint64_t>::max(); - for (size_t i = 0, e = Buckets.size(); i < e; ++i) { - for (HashList::const_iterator HI = Buckets[i].begin(), - HE = Buckets[i].end(); - HI != HE; ++HI) { - uint32_t HashValue = (*HI)->HashValue; - if (PrevHash == HashValue) - continue; - Asm->OutStreamer->AddComment("Hash in Bucket " + Twine(i)); - Asm->EmitInt32(HashValue); - PrevHash = HashValue; - } - } -} - -// Walk through the buckets and emit the individual offsets for each -// element in each bucket. This is done via a symbol subtraction from the -// beginning of the section. The non-section symbol will be output later -// when we emit the actual data. -void DwarfAccelTable::emitOffsets(AsmPrinter *Asm, const MCSymbol *SecBegin) { - uint64_t PrevHash = std::numeric_limits<uint64_t>::max(); - for (size_t i = 0, e = Buckets.size(); i < e; ++i) { - for (HashList::const_iterator HI = Buckets[i].begin(), - HE = Buckets[i].end(); - HI != HE; ++HI) { - uint32_t HashValue = (*HI)->HashValue; - if (PrevHash == HashValue) - continue; - PrevHash = HashValue; - Asm->OutStreamer->AddComment("Offset in Bucket " + Twine(i)); - MCContext &Context = Asm->OutStreamer->getContext(); - const MCExpr *Sub = MCBinaryExpr::createSub( - MCSymbolRefExpr::create((*HI)->Sym, Context), - MCSymbolRefExpr::create(SecBegin, Context), Context); - Asm->OutStreamer->EmitValue(Sub, sizeof(uint32_t)); - } - } -} - -// Walk through the buckets and emit the full data for each element in -// the bucket. For the string case emit the dies and the various offsets. -// Terminate each HashData bucket with 0. -void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) { - for (size_t i = 0, e = Buckets.size(); i < e; ++i) { - uint64_t PrevHash = std::numeric_limits<uint64_t>::max(); - for (HashList::const_iterator HI = Buckets[i].begin(), - HE = Buckets[i].end(); - HI != HE; ++HI) { - // Terminate the previous entry if there is no hash collision - // with the current one. - if (PrevHash != std::numeric_limits<uint64_t>::max() && - PrevHash != (*HI)->HashValue) - Asm->EmitInt32(0); - // Remember to emit the label for our offset. - Asm->OutStreamer->EmitLabel((*HI)->Sym); - Asm->OutStreamer->AddComment((*HI)->Str); - Asm->emitDwarfStringOffset((*HI)->Data.Name); - Asm->OutStreamer->AddComment("Num DIEs"); - Asm->EmitInt32((*HI)->Data.Values.size()); - for (HashDataContents *HD : (*HI)->Data.Values) { - // Emit the DIE offset - Asm->EmitInt32(HD->Die->getDebugSectionOffset()); - // If we have multiple Atoms emit that info too. - // FIXME: A bit of a hack, we either emit only one atom or all info. - if (HeaderData.Atoms.size() > 1) { - Asm->EmitInt16(HD->Die->getTag()); - Asm->EmitInt8(HD->Flags); - } - } - PrevHash = (*HI)->HashValue; - } - // Emit the final end marker for the bucket. - if (!Buckets[i].empty()) - Asm->EmitInt32(0); - } -} - -// Emit the entire data structure to the output file. -void DwarfAccelTable::emit(AsmPrinter *Asm, const MCSymbol *SecBegin, - DwarfDebug *D) { - // Emit the header. - EmitHeader(Asm); - - // Emit the buckets. - EmitBuckets(Asm); - - // Emit the hashes. - EmitHashes(Asm); - - // Emit the offsets. - emitOffsets(Asm, SecBegin); - - // Emit the hash data. - EmitData(Asm, D); -} - -#ifndef NDEBUG -void DwarfAccelTable::print(raw_ostream &OS) { - Header.print(OS); - HeaderData.print(OS); - - OS << "Entries: \n"; - for (StringMap<DataArray>::const_iterator EI = Entries.begin(), - EE = Entries.end(); - EI != EE; ++EI) { - OS << "Name: " << EI->getKeyData() << "\n"; - for (HashDataContents *HD : EI->second.Values) - HD->print(OS); - } - - OS << "Buckets and Hashes: \n"; - for (size_t i = 0, e = Buckets.size(); i < e; ++i) - for (HashList::const_iterator HI = Buckets[i].begin(), - HE = Buckets[i].end(); - HI != HE; ++HI) - (*HI)->print(OS); - - OS << "Data: \n"; - for (std::vector<HashData *>::const_iterator DI = Data.begin(), - DE = Data.end(); - DI != DE; ++DI) - (*DI)->print(OS); -} -#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h deleted file mode 100644 index f56199dc8e72..000000000000 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h +++ /dev/null @@ -1,261 +0,0 @@ -//==- llvm/CodeGen/DwarfAccelTable.h - Dwarf Accelerator Tables --*- C++ -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains support for writing dwarf accelerator tables. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFACCELTABLE_H -#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFACCELTABLE_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/BinaryFormat/Dwarf.h" -#include "llvm/CodeGen/DIE.h" -#include "llvm/CodeGen/DwarfStringPoolEntry.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Support/Allocator.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/raw_ostream.h" -#include <cstddef> -#include <cstdint> -#include <vector> - -// The dwarf accelerator tables are an indirect hash table optimized -// for null lookup rather than access to known data. They are output into -// an on-disk format that looks like this: -// -// .-------------. -// | HEADER | -// |-------------| -// | BUCKETS | -// |-------------| -// | HASHES | -// |-------------| -// | OFFSETS | -// |-------------| -// | DATA | -// `-------------' -// -// where the header contains a magic number, version, type of hash function, -// the number of buckets, total number of hashes, and room for a special -// struct of data and the length of that struct. -// -// The buckets contain an index (e.g. 6) into the hashes array. The hashes -// section contains all of the 32-bit hash values in contiguous memory, and -// the offsets contain the offset into the data area for the particular -// hash. -// -// For a lookup example, we could hash a function name and take it modulo the -// number of buckets giving us our bucket. From there we take the bucket value -// as an index into the hashes table and look at each successive hash as long -// as the hash value is still the same modulo result (bucket value) as earlier. -// If we have a match we look at that same entry in the offsets table and -// grab the offset in the data for our final match. - -namespace llvm { - -class AsmPrinter; -class DwarfDebug; - -class DwarfAccelTable { - // Helper function to compute the number of buckets needed based on - // the number of unique hashes. - void ComputeBucketCount(); - - struct TableHeader { - uint32_t magic = MagicHash; // 'HASH' magic value to allow endian detection - uint16_t version = 1; // Version number. - uint16_t hash_function = dwarf::DW_hash_function_djb; - // The hash function enumeration that was used. - uint32_t bucket_count = 0; // The number of buckets in this hash table. - uint32_t hashes_count = 0; // The total number of unique hash values - // and hash data offsets in this table. - uint32_t header_data_len; // The bytes to skip to get to the hash - // indexes (buckets) for correct alignment. - // Also written to disk is the implementation specific header data. - - static const uint32_t MagicHash = 0x48415348; - - TableHeader(uint32_t data_len) : header_data_len(data_len) {} - -#ifndef NDEBUG - void print(raw_ostream &OS) { - OS << "Magic: " << format("0x%x", magic) << "\n" - << "Version: " << version << "\n" - << "Hash Function: " << hash_function << "\n" - << "Bucket Count: " << bucket_count << "\n" - << "Header Data Length: " << header_data_len << "\n"; - } - - void dump() { print(dbgs()); } -#endif - }; - -public: - // The HeaderData describes the form of each set of data. In general this - // is as a list of atoms (atom_count) where each atom contains a type - // (AtomType type) of data, and an encoding form (form). In the case of - // data that is referenced via DW_FORM_ref_* the die_offset_base is - // used to describe the offset for all forms in the list of atoms. - // This also serves as a public interface of sorts. - // When written to disk this will have the form: - // - // uint32_t die_offset_base - // uint32_t atom_count - // atom_count Atoms - - // Make these public so that they can be used as a general interface to - // the class. - struct Atom { - uint16_t type; // enum AtomType - uint16_t form; // DWARF DW_FORM_ defines - - constexpr Atom(uint16_t type, uint16_t form) : type(type), form(form) {} - -#ifndef NDEBUG - void print(raw_ostream &OS) { - OS << "Type: " << dwarf::AtomTypeString(type) << "\n" - << "Form: " << dwarf::FormEncodingString(form) << "\n"; - } - - void dump() { print(dbgs()); } -#endif - }; - -private: - struct TableHeaderData { - uint32_t die_offset_base; - SmallVector<Atom, 3> Atoms; - - TableHeaderData(ArrayRef<Atom> AtomList, uint32_t offset = 0) - : die_offset_base(offset), Atoms(AtomList.begin(), AtomList.end()) {} - -#ifndef NDEBUG - void print(raw_ostream &OS) { - OS << "die_offset_base: " << die_offset_base << "\n"; - for (size_t i = 0; i < Atoms.size(); i++) - Atoms[i].print(OS); - } - - void dump() { print(dbgs()); } -#endif - }; - - // The data itself consists of a str_offset, a count of the DIEs in the - // hash and the offsets to the DIEs themselves. - // On disk each data section is ended with a 0 KeyType as the end of the - // hash chain. - // On output this looks like: - // uint32_t str_offset - // uint32_t hash_data_count - // HashData[hash_data_count] -public: - struct HashDataContents { - const DIE *Die; // Offsets - char Flags; // Specific flags to output - - HashDataContents(const DIE *D, char Flags) : Die(D), Flags(Flags) {} - -#ifndef NDEBUG - void print(raw_ostream &OS) const { - OS << " Offset: " << Die->getOffset() << "\n" - << " Tag: " << dwarf::TagString(Die->getTag()) << "\n" - << " Flags: " << Flags << "\n"; - } -#endif - }; - -private: - // String Data - struct DataArray { - DwarfStringPoolEntryRef Name; - std::vector<HashDataContents *> Values; - }; - - friend struct HashData; - - struct HashData { - StringRef Str; - uint32_t HashValue; - MCSymbol *Sym; - DwarfAccelTable::DataArray &Data; // offsets - - HashData(StringRef S, DwarfAccelTable::DataArray &Data) - : Str(S), Data(Data) { - HashValue = dwarf::djbHash(S); - } - -#ifndef NDEBUG - void print(raw_ostream &OS) { - OS << "Name: " << Str << "\n"; - OS << " Hash Value: " << format("0x%x", HashValue) << "\n"; - OS << " Symbol: "; - if (Sym) - OS << *Sym; - else - OS << "<none>"; - OS << "\n"; - for (HashDataContents *C : Data.Values) { - OS << " Offset: " << C->Die->getOffset() << "\n"; - OS << " Tag: " << dwarf::TagString(C->Die->getTag()) << "\n"; - OS << " Flags: " << C->Flags << "\n"; - } - } - - void dump() { print(dbgs()); } -#endif - }; - - // Internal Functions - void EmitHeader(AsmPrinter *); - void EmitBuckets(AsmPrinter *); - void EmitHashes(AsmPrinter *); - void emitOffsets(AsmPrinter *, const MCSymbol *); - void EmitData(AsmPrinter *, DwarfDebug *D); - - // Allocator for HashData and HashDataContents. - BumpPtrAllocator Allocator; - - // Output Variables - TableHeader Header; - TableHeaderData HeaderData; - std::vector<HashData *> Data; - - using StringEntries = StringMap<DataArray, BumpPtrAllocator &>; - - StringEntries Entries; - - // Buckets/Hashes/Offsets - using HashList = std::vector<HashData *>; - using BucketList = std::vector<HashList>; - BucketList Buckets; - HashList Hashes; - - // Public Implementation -public: - DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom>); - DwarfAccelTable(const DwarfAccelTable &) = delete; - DwarfAccelTable &operator=(const DwarfAccelTable &) = delete; - - void AddName(DwarfStringPoolEntryRef Name, const DIE *Die, char Flags = 0); - void FinalizeTable(AsmPrinter *, StringRef); - void emit(AsmPrinter *, const MCSymbol *, DwarfDebug *); -#ifndef NDEBUG - void print(raw_ostream &OS); - void dump() { print(dbgs()); } -#endif -}; - -} // end namespace llvm - -#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DWARFACCELTABLE_H diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index cbb4c48b4d88..1990456cc555 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -17,7 +17,6 @@ #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/TargetLoweringObjectFile.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" @@ -30,6 +29,7 @@ #include "llvm/MC/MachineLocation.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index c8cd8eb8ffd3..32271a0ef24a 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -28,7 +28,6 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/TargetFrameLowering.h" -#include "llvm/CodeGen/TargetLoweringObjectFile.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" @@ -40,6 +39,7 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MachineLocation.h" #include "llvm/Support/Casting.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include <algorithm> @@ -94,16 +94,18 @@ void DwarfCompileUnit::addLocalLabelAddress(DIE &Die, DIEInteger(0)); } -unsigned DwarfCompileUnit::getOrCreateSourceID(StringRef FileName, - StringRef DirName) { +unsigned DwarfCompileUnit::getOrCreateSourceID(const DIFile *File) { // If we print assembly, we can't separate .file entries according to // compile units. Thus all files will belong to the default compile unit. // FIXME: add a better feature test than hasRawTextSupport. Even better, // extend .file to support this. + unsigned CUID = Asm->OutStreamer->hasRawTextSupport() ? 0 : getUniqueID(); + if (!File) + return Asm->OutStreamer->EmitDwarfFileDirective(0, "", "", nullptr, None, CUID); return Asm->OutStreamer->EmitDwarfFileDirective( - 0, DirName, FileName, - Asm->OutStreamer->hasRawTextSupport() ? 0 : getUniqueID()); + 0, File->getDirectory(), File->getFilename(), getMD5AsBytes(File), + File->getSource(), CUID); } DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE( @@ -190,10 +192,13 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE( DwarfExpr = llvm::make_unique<DIEDwarfExpression>(*Asm, *this, *Loc); } + if (Expr) + DwarfExpr->addFragmentOffset(Expr); + if (Global) { const MCSymbol *Sym = Asm->getSymbol(Global); if (Global->isThreadLocal()) { - if (Asm->TM.Options.EmulatedTLS) { + if (Asm->TM.useEmulatedTLS()) { // TODO: add debug info for emulated thread local mode. } else { // FIXME: Make this work with -gsplit-dwarf. @@ -225,10 +230,13 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE( addOpAddress(*Loc, Sym); } } - if (Expr) { - DwarfExpr->addFragmentOffset(Expr); - DwarfExpr->addExpression(Expr); - } + // Global variables attached to symbols are memory locations. + // It would be better if this were unconditional, but malformed input that + // mixes non-fragments and fragments for the same variable is too expensive + // to detect in the verifier. + if (DwarfExpr->isUnknownLocation()) + DwarfExpr->setMemoryLocationKind(); + DwarfExpr->addExpression(Expr); } if (Loc) addBlock(*VariableDIE, dwarf::DW_AT_location, DwarfExpr->finalize()); @@ -241,7 +249,8 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE( // If the linkage name is different than the name, go ahead and output // that as well into the name table. - if (GV->getLinkageName() != "" && GV->getName() != GV->getLinkageName()) + if (GV->getLinkageName() != "" && GV->getName() != GV->getLinkageName() && + DD->useAllLinkageNames()) DD->addAccelName(GV->getLinkageName(), *VariableDIE); } @@ -267,15 +276,20 @@ void DwarfCompileUnit::addRange(RangeSpan Range) { void DwarfCompileUnit::initStmtList() { // Define start line table label for each Compile Unit. - MCSymbol *LineTableStartSym = - Asm->OutStreamer->getDwarfLineTableSymbol(getUniqueID()); + MCSymbol *LineTableStartSym; + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + if (DD->useSectionsAsReferences()) { + LineTableStartSym = TLOF.getDwarfLineSection()->getBeginSymbol(); + } else { + LineTableStartSym = + Asm->OutStreamer->getDwarfLineTableSymbol(getUniqueID()); + } // DW_AT_stmt_list is a offset of line number information for this // compile unit in debug_line section. For split dwarf this is // left in the skeleton CU and so not included. // The line table entries are not always emitted in assembly, so it // is not okay to use line_table_start here. - const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); StmtListValue = addSectionLabel(getUnitDie(), dwarf::DW_AT_stmt_list, LineTableStartSym, TLOF.getDwarfLineSection()->getBeginSymbol()); @@ -313,10 +327,16 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) { // Only include DW_AT_frame_base in full debug info if (!includeMinimalInlineScopes()) { - const TargetRegisterInfo *RI = Asm->MF->getSubtarget().getRegisterInfo(); - MachineLocation Location(RI->getFrameRegister(*Asm->MF)); - if (RI->isPhysicalRegister(Location.getReg())) - addAddress(*SPDie, dwarf::DW_AT_frame_base, Location); + if (Asm->MF->getTarget().getTargetTriple().isNVPTX()) { + DIELoc *Loc = new (DIEValueAllocator) DIELoc; + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_call_frame_cfa); + addBlock(*SPDie, dwarf::DW_AT_frame_base, Loc); + } else { + const TargetRegisterInfo *RI = Asm->MF->getSubtarget().getRegisterInfo(); + MachineLocation Location(RI->getFrameRegister(*Asm->MF)); + if (RI->isPhysicalRegister(Location.getReg())) + addAddress(*SPDie, dwarf::DW_AT_frame_base, Location); + } } // Add name to the name table, we do this here because we're guaranteed @@ -385,21 +405,28 @@ void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE, SmallVector<RangeSpan, 2> Range) { const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); - // Emit offset in .debug_range as a relocatable label. emitDIE will handle - // emitting it appropriately. + // Emit the offset into .debug_ranges or .debug_rnglists as a relocatable + // label. emitDIE() will handle emitting it appropriately. const MCSymbol *RangeSectionSym = - TLOF.getDwarfRangesSection()->getBeginSymbol(); + DD->getDwarfVersion() >= 5 + ? TLOF.getDwarfRnglistsSection()->getBeginSymbol() + : TLOF.getDwarfRangesSection()->getBeginSymbol(); RangeSpanList List(Asm->createTempSymbol("debug_ranges"), std::move(Range)); // Under fission, ranges are specified by constant offsets relative to the // CU's DW_AT_GNU_ranges_base. - if (isDwoUnit()) - addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(), - RangeSectionSym); - else + // FIXME: For DWARF v5, do not generate the DW_AT_ranges attribute under + // fission until we support the forms using the .debug_addr section + // (DW_RLE_startx_endx etc.). + if (isDwoUnit()) { + if (DD->getDwarfVersion() < 5) + addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(), + RangeSectionSym); + } else { addSectionLabel(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(), RangeSectionSym); + } // Add the range list to the set of ranges to be emitted. (Skeleton ? Skeleton : this)->CURangeLists.push_back(std::move(List)); @@ -407,9 +434,10 @@ void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE, void DwarfCompileUnit::attachRangesOrLowHighPC( DIE &Die, SmallVector<RangeSpan, 2> Ranges) { - if (Ranges.size() == 1) { - const auto &single = Ranges.front(); - attachLowHighPC(Die, single.getStart(), single.getEnd()); + if (Ranges.size() == 1 || !DD->useRangesSection()) { + const RangeSpan &Front = Ranges.front(); + const RangeSpan &Back = Ranges.back(); + attachLowHighPC(Die, Front.getStart(), Back.getEnd()); } else addScopeRangeList(Die, std::move(Ranges)); } @@ -443,7 +471,7 @@ DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) { // Add the call site information to the DIE. const DILocation *IA = Scope->getInlinedAt(); addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None, - getOrCreateSourceID(IA->getFilename(), IA->getDirectory())); + getOrCreateSourceID(IA->getFile())); addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, IA->getLine()); if (IA->getDiscriminator() && DD->getDwarfVersion() >= 4) addUInt(*ScopeDIE, dwarf::DW_AT_GNU_discriminator, None, @@ -482,6 +510,7 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, bool Abstract) { // Define variable debug information entry. auto VariableDie = DIE::get(DIEValueAllocator, DV.getTag()); + insertDIE(DV.getVariable(), VariableDie); if (Abstract) { applyVariableAttributes(DV, *VariableDie); @@ -547,8 +576,11 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, Ops.append(Expr->elements_begin(), Expr->elements_end()); DIExpressionCursor Cursor(Ops); DwarfExpr.setMemoryLocationKind(); - DwarfExpr.addMachineRegExpression( - *Asm->MF->getSubtarget().getRegisterInfo(), Cursor, FrameReg); + if (const MCSymbol *FrameSymbol = Asm->getFunctionFrameSymbol()) + addOpAddress(*Loc, FrameSymbol); + else + DwarfExpr.addMachineRegExpression( + *Asm->MF->getSubtarget().getRegisterInfo(), Cursor, FrameReg); DwarfExpr.addExpression(std::move(Cursor)); } addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize()); @@ -565,13 +597,95 @@ DIE *DwarfCompileUnit::constructVariableDIE(DbgVariable &DV, return Var; } +/// Return all DIVariables that appear in count: expressions. +static SmallVector<const DIVariable *, 2> dependencies(DbgVariable *Var) { + SmallVector<const DIVariable *, 2> Result; + auto *Array = dyn_cast<DICompositeType>(Var->getType()); + if (!Array || Array->getTag() != dwarf::DW_TAG_array_type) + return Result; + for (auto *El : Array->getElements()) { + if (auto *Subrange = dyn_cast<DISubrange>(El)) { + auto Count = Subrange->getCount(); + if (auto *Dependency = Count.dyn_cast<DIVariable *>()) + Result.push_back(Dependency); + } + } + return Result; +} + +/// Sort local variables so that variables appearing inside of helper +/// expressions come first. +static SmallVector<DbgVariable *, 8> +sortLocalVars(SmallVectorImpl<DbgVariable *> &Input) { + SmallVector<DbgVariable *, 8> Result; + SmallVector<PointerIntPair<DbgVariable *, 1>, 8> WorkList; + // Map back from a DIVariable to its containing DbgVariable. + SmallDenseMap<const DILocalVariable *, DbgVariable *> DbgVar; + // Set of DbgVariables in Result. + SmallDenseSet<DbgVariable *, 8> Visited; + // For cycle detection. + SmallDenseSet<DbgVariable *, 8> Visiting; + + // Initialize the worklist and the DIVariable lookup table. + for (auto Var : reverse(Input)) { + DbgVar.insert({Var->getVariable(), Var}); + WorkList.push_back({Var, 0}); + } + + // Perform a stable topological sort by doing a DFS. + while (!WorkList.empty()) { + auto Item = WorkList.back(); + DbgVariable *Var = Item.getPointer(); + bool visitedAllDependencies = Item.getInt(); + WorkList.pop_back(); + + // Dependency is in a different lexical scope or a global. + if (!Var) + continue; + + // Already handled. + if (Visited.count(Var)) + continue; + + // Add to Result if all dependencies are visited. + if (visitedAllDependencies) { + Visited.insert(Var); + Result.push_back(Var); + continue; + } + + // Detect cycles. + auto Res = Visiting.insert(Var); + if (!Res.second) { + assert(false && "dependency cycle in local variables"); + return Result; + } + + // Push dependencies and this node onto the worklist, so that this node is + // visited again after all of its dependencies are handled. + WorkList.push_back({Var, 1}); + for (auto *Dependency : dependencies(Var)) { + auto Dep = dyn_cast_or_null<const DILocalVariable>(Dependency); + WorkList.push_back({DbgVar[Dep], 0}); + } + } + return Result; +} + DIE *DwarfCompileUnit::createScopeChildrenDIE(LexicalScope *Scope, SmallVectorImpl<DIE *> &Children, bool *HasNonScopeChildren) { assert(Children.empty()); DIE *ObjectPointer = nullptr; - for (DbgVariable *DV : DU->getScopeVariables().lookup(Scope)) + // Emit function arguments (order is significant). + auto Vars = DU->getScopeVariables().lookup(Scope); + for (auto &DV : Vars.Args) + Children.push_back(constructVariableDIE(*DV.second, *Scope, ObjectPointer)); + + // Emit local variables. + auto Locals = sortLocalVars(Vars.Locals); + for (DbgVariable *DV : Locals) Children.push_back(constructVariableDIE(*DV, *Scope, ObjectPointer)); // Skip imported directives in gmlt-like data. @@ -687,9 +801,7 @@ DIE *DwarfCompileUnit::constructImportedEntityDIE( else EntityDie = getDIE(Entity); assert(EntityDie); - auto *File = Module->getFile(); - addSourceLine(*IMDie, Module->getLine(), File ? File->getFilename() : "", - File ? File->getDirectory() : ""); + addSourceLine(*IMDie, Module->getLine(), Module->getFile()); addDIEEntry(*IMDie, dwarf::DW_AT_import, *EntityDie); StringRef Name = Module->getName(); if (!Name.empty()) @@ -750,7 +862,7 @@ void DwarfCompileUnit::createAbstractVariable(const DILocalVariable *Var, void DwarfCompileUnit::emitHeader(bool UseOffsets) { // Don't bother labeling the .dwo unit, as its offset isn't used. - if (!Skeleton) { + if (!Skeleton && !DD->useSectionsAsReferences()) { LabelBegin = Asm->createTempSymbol("cu_begin"); Asm->OutStreamer->EmitLabel(LabelBegin); } @@ -759,6 +871,8 @@ void DwarfCompileUnit::emitHeader(bool UseOffsets) { : DD->useSplitDwarf() ? dwarf::DW_UT_skeleton : dwarf::DW_UT_compile; DwarfUnit::emitCommonHeader(UseOffsets, UT); + if (DD->getDwarfVersion() >= 5 && UT != dwarf::DW_UT_compile) + Asm->emitInt64(getDWOId()); } bool DwarfCompileUnit::hasDwarfPubSections() const { @@ -767,7 +881,8 @@ bool DwarfCompileUnit::hasDwarfPubSections() const { if (CUNode->getGnuPubnames()) return true; - return DD->tuneForGDB() && !includeMinimalInlineScopes(); + return DD->tuneForGDB() && DD->usePubSections() && + !includeMinimalInlineScopes(); } /// addGlobalName - Add a new global name to the compile unit. diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 68482eb7e358..51e1558fe4a3 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -83,7 +83,10 @@ class DwarfCompileUnit final : public DwarfUnit { DenseMap<const MDNode *, DIE *> AbstractSPDies; DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> AbstractVariables; - /// \brief Construct a DIE for the given DbgVariable without initializing the + /// DWO ID for correlating skeleton and split units. + uint64_t DWOId = 0; + + /// Construct a DIE for the given DbgVariable without initializing the /// DbgVariable's DIE reference. DIE *constructVariableDIEImpl(const DbgVariable &DV, bool Abstract); @@ -141,7 +144,7 @@ public: DwarfCompileUnit &getCU() override { return *this; } - unsigned getOrCreateSourceID(StringRef FileName, StringRef DirName) override; + unsigned getOrCreateSourceID(const DIFile *File) override; void addImportedEntity(const DIImportedEntity* IE) { DIScope *Scope = IE->getScope(); @@ -159,7 +162,7 @@ public: void attachLowHighPC(DIE &D, const MCSymbol *Begin, const MCSymbol *End); - /// \brief Find DIE for the given subprogram and attach appropriate + /// Find DIE for the given subprogram and attach appropriate /// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global /// variables in this scope then create and insert DIEs for these /// variables. @@ -168,7 +171,7 @@ public: void constructScopeDIE(LexicalScope *Scope, SmallVectorImpl<DIE *> &FinalChildren); - /// \brief A helper function to construct a RangeSpanList for a given + /// A helper function to construct a RangeSpanList for a given /// lexical scope. void addScopeRangeList(DIE &ScopeDIE, SmallVector<RangeSpan, 2> Range); @@ -177,11 +180,11 @@ public: void attachRangesOrLowHighPC(DIE &D, const SmallVectorImpl<InsnRange> &Ranges); - /// \brief This scope represents inlined body of a function. Construct + /// This scope represents inlined body of a function. Construct /// DIE to represent this concrete inlined copy of the function. DIE *constructInlinedScopeDIE(LexicalScope *Scope); - /// \brief Construct new DW_TAG_lexical_block for this scope and + /// Construct new DW_TAG_lexical_block for this scope and /// attach DW_AT_low_pc/DW_AT_high_pc labels. DIE *constructLexicalScopeDIE(LexicalScope *Scope); @@ -196,14 +199,14 @@ public: SmallVectorImpl<DIE *> &Children, bool *HasNonScopeChildren = nullptr); - /// \brief Construct a DIE for this subprogram scope. + /// Construct a DIE for this subprogram scope. void constructSubprogramScopeDIE(const DISubprogram *Sub, LexicalScope *Scope); DIE *createAndAddScopeChildren(LexicalScope *Scope, DIE &ScopeDIE); void constructAbstractSubprogramScopeDIE(LexicalScope *Scope); - /// \brief Construct import_module DIE. + /// Construct import_module DIE. DIE *constructImportedEntityDIE(const DIImportedEntity *Module); void finishSubprogramDefinition(const DISubprogram *SP); @@ -214,11 +217,18 @@ public: DbgVariable *getExistingAbstractVariable(InlinedVariable IV, const DILocalVariable *&Cleansed); DbgVariable *getExistingAbstractVariable(InlinedVariable IV); - void createAbstractVariable(const DILocalVariable *DV, LexicalScope *Scope); + void createAbstractVariable(const DILocalVariable *Var, LexicalScope *Scope); /// Set the skeleton unit associated with this unit. void setSkeleton(DwarfCompileUnit &Skel) { Skeleton = &Skel; } + unsigned getHeaderSize() const override { + // DWARF v5 added the DWO ID to the header for split/skeleton units. + unsigned DWOIdSize = + DD->getDwarfVersion() >= 5 && DD->useSplitDwarf() ? sizeof(uint64_t) + : 0; + return DwarfUnit::getHeaderSize() + DWOIdSize; + } unsigned getLength() { return sizeof(uint32_t) + // Length field getHeaderSize() + getUnitDie().getSize(); @@ -290,6 +300,9 @@ public: void setBaseAddress(const MCSymbol *Base) { BaseAddress = Base; } const MCSymbol *getBaseAddress() const { return BaseAddress; } + uint64_t getDWOId() const { return DWOId; } + void setDWOId(uint64_t DwoId) { DWOId = DwoId; } + bool hasDwarfPubSections() const; }; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 2c9c7d4f3146..8761fae9dd22 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -16,7 +16,6 @@ #include "DIEHash.h" #include "DebugLocEntry.h" #include "DebugLocStream.h" -#include "DwarfAccelTable.h" #include "DwarfCompileUnit.h" #include "DwarfExpression.h" #include "DwarfFile.h" @@ -31,6 +30,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/CodeGen/AccelTable.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/DIE.h" #include "llvm/CodeGen/LexicalScopes.h" @@ -39,7 +39,6 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/TargetLoweringObjectFile.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Constants.h" @@ -66,6 +65,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include <algorithm> @@ -94,6 +94,11 @@ static cl::opt<bool> GenerateARangeSection("generate-arange-section", cl::desc("Generate dwarf aranges"), cl::init(false)); +static cl::opt<bool> + GenerateDwarfTypeUnits("generate-type-units", cl::Hidden, + cl::desc("Generate DWARF4 type units."), + cl::init(false)); + static cl::opt<bool> SplitDwarfCrossCuReferences( "split-dwarf-cross-cu-references", cl::Hidden, cl::desc("Enable cross-cu references in DWO files"), cl::init(false)); @@ -107,14 +112,40 @@ static cl::opt<DefaultOnOff> UnknownLocations( clEnumVal(Enable, "In all cases"), clEnumVal(Disable, "Never")), cl::init(Default)); +static cl::opt<AccelTableKind> AccelTables( + "accel-tables", cl::Hidden, cl::desc("Output dwarf accelerator tables."), + cl::values(clEnumValN(AccelTableKind::Default, "Default", + "Default for platform"), + clEnumValN(AccelTableKind::None, "Disable", "Disabled."), + clEnumValN(AccelTableKind::Apple, "Apple", "Apple"), + clEnumValN(AccelTableKind::Dwarf, "Dwarf", "DWARF")), + cl::init(AccelTableKind::Default)); + static cl::opt<DefaultOnOff> -DwarfAccelTables("dwarf-accel-tables", cl::Hidden, - cl::desc("Output prototype dwarf accelerator tables."), +DwarfInlinedStrings("dwarf-inlined-strings", cl::Hidden, + cl::desc("Use inlined strings rather than string section."), cl::values(clEnumVal(Default, "Default for platform"), clEnumVal(Enable, "Enabled"), clEnumVal(Disable, "Disabled")), cl::init(Default)); +static cl::opt<bool> + NoDwarfPubSections("no-dwarf-pub-sections", cl::Hidden, + cl::desc("Disable emission of DWARF pub sections."), + cl::init(false)); + +static cl::opt<bool> + NoDwarfRangesSection("no-dwarf-ranges-section", cl::Hidden, + cl::desc("Disable emission .debug_ranges section."), + cl::init(false)); + +static cl::opt<DefaultOnOff> DwarfSectionsAsReferences( + "dwarf-sections-as-references", cl::Hidden, + cl::desc("Use sections+offset as references rather than labels."), + cl::values(clEnumVal(Default, "Default for platform"), + clEnumVal(Enable, "Enabled"), clEnumVal(Disable, "Disabled")), + cl::init(Default)); + enum LinkageNameOption { DefaultLinkageNames, AllLinkageNames, @@ -215,11 +246,11 @@ ArrayRef<DbgVariable::FrameIndexExpr> DbgVariable::getFrameIndexExprs() const { return A.Expr->isFragment(); }) && "multiple FI expressions without DW_OP_LLVM_fragment"); - std::sort(FrameIndexExprs.begin(), FrameIndexExprs.end(), - [](const FrameIndexExpr &A, const FrameIndexExpr &B) -> bool { - return A.Expr->getFragmentInfo()->OffsetInBits < - B.Expr->getFragmentInfo()->OffsetInBits; - }); + llvm::sort(FrameIndexExprs.begin(), FrameIndexExprs.end(), + [](const FrameIndexExpr &A, const FrameIndexExpr &B) -> bool { + return A.Expr->getFragmentInfo()->OffsetInBits < + B.Expr->getFragmentInfo()->OffsetInBits; + }); return FrameIndexExprs; } @@ -258,23 +289,34 @@ void DbgVariable::addMMIEntry(const DbgVariable &V) { "conflicting locations for variable"); } -static const DwarfAccelTable::Atom TypeAtoms[] = { - DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4), - DwarfAccelTable::Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2), - DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)}; +static AccelTableKind computeAccelTableKind(unsigned DwarfVersion, + bool GenerateTypeUnits, + DebuggerKind Tuning, + const Triple &TT) { + // Honor an explicit request. + if (AccelTables != AccelTableKind::Default) + return AccelTables; + + // Accelerator tables with type units are currently not supported. + if (GenerateTypeUnits) + return AccelTableKind::None; + + // Accelerator tables get emitted if targetting DWARF v5 or LLDB. DWARF v5 + // always implies debug_names. For lower standard versions we use apple + // accelerator tables on apple platforms and debug_names elsewhere. + if (DwarfVersion >= 5) + return AccelTableKind::Dwarf; + if (Tuning == DebuggerKind::LLDB) + return TT.isOSBinFormatMachO() ? AccelTableKind::Apple + : AccelTableKind::Dwarf; + return AccelTableKind::None; +} DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) : DebugHandlerBase(A), DebugLocs(A->OutStreamer->isVerboseAsm()), InfoHolder(A, "info_string", DIEValueAllocator), SkeletonHolder(A, "skel_string", DIEValueAllocator), - IsDarwin(A->TM.getTargetTriple().isOSDarwin()), - AccelNames(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, - dwarf::DW_FORM_data4)), - AccelObjC(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, - dwarf::DW_FORM_data4)), - AccelNamespace(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, - dwarf::DW_FORM_data4)), - AccelTypes(TypeAtoms) { + IsDarwin(A->TM.getTargetTriple().isOSDarwin()) { const Triple &TT = Asm->TM.getTargetTriple(); // Make sure we know our "debugger tuning." The target option takes @@ -288,11 +330,12 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) else DebuggerTuning = DebuggerKind::GDB; - // Turn on accelerator tables for LLDB by default. - if (DwarfAccelTables == Default) - HasDwarfAccelTables = tuneForLLDB(); + if (DwarfInlinedStrings == Default) + UseInlineStrings = TT.isNVPTX(); else - HasDwarfAccelTables = DwarfAccelTables == Enable; + UseInlineStrings = DwarfInlinedStrings == Enable; + + UseLocSection = !TT.isNVPTX(); HasAppleExtensionAttributes = tuneForLLDB(); @@ -308,8 +351,23 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) unsigned DwarfVersionNumber = Asm->TM.Options.MCOptions.DwarfVersion; unsigned DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber : MMI->getModule()->getDwarfVersion(); - // Use dwarf 4 by default if nothing is requested. - DwarfVersion = DwarfVersion ? DwarfVersion : dwarf::DWARF_VERSION; + // Use dwarf 4 by default if nothing is requested. For NVPTX, use dwarf 2. + DwarfVersion = + TT.isNVPTX() ? 2 : (DwarfVersion ? DwarfVersion : dwarf::DWARF_VERSION); + + UsePubSections = !NoDwarfPubSections && !TT.isNVPTX(); + UseRangesSection = !NoDwarfRangesSection && !TT.isNVPTX(); + + // Use sections as references. Force for NVPTX. + if (DwarfSectionsAsReferences == Default) + UseSectionsAsReferences = TT.isNVPTX(); + else + UseSectionsAsReferences = DwarfSectionsAsReferences == Enable; + + GenerateTypeUnits = GenerateDwarfTypeUnits; + + TheAccelTableKind = computeAccelTableKind( + DwarfVersion, GenerateTypeUnits, DebuggerTuning, A->TM.getTargetTriple()); // Work around a GDB bug. GDB doesn't support the standard opcode; // SCE doesn't support GNU's; LLDB prefers the standard opcode, which @@ -321,6 +379,12 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) // GDB does not fully support the DWARF 4 representation for bitfields. UseDWARF2Bitfields = (DwarfVersion < 4) || tuneForGDB(); + // The DWARF v5 string offsets table has - possibly shared - contributions + // from each compile and type unit each preceded by a header. The string + // offsets table used by the pre-DWARF v5 split-DWARF implementation uses + // a monolithic string offsets table without any header. + UseSegmentedStringOffsetsTable = DwarfVersion >= 5; + Asm->OutStreamer->getContext().setDwarfVersion(DwarfVersion); } @@ -355,17 +419,18 @@ static StringRef getObjCMethodName(StringRef In) { } // Add the various names to the Dwarf accelerator table names. -// TODO: Determine whether or not we should add names for programs -// that do not have a DW_AT_name or DW_AT_linkage_name field - this -// is only slightly different than the lookup of non-standard ObjC names. void DwarfDebug::addSubprogramNames(const DISubprogram *SP, DIE &Die) { if (!SP->isDefinition()) return; - addAccelName(SP->getName(), Die); - // If the linkage name is different than the name, go ahead and output - // that as well into the name table. - if (SP->getLinkageName() != "" && SP->getName() != SP->getLinkageName()) + if (SP->getName() != "") + addAccelName(SP->getName(), Die); + + // If the linkage name is different than the name, go ahead and output that as + // well into the name table. Only do that if we are going to actually emit + // that name. + if (SP->getLinkageName() != "" && SP->getName() != SP->getLinkageName() && + (useAllLinkageNames() || InfoHolder.getAbstractSPDies().lookup(SP))) addAccelName(SP->getLinkageName(), Die); // If this is an Objective-C selector name add it to the ObjC accelerator @@ -471,8 +536,9 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) { // explicitly describe the directory of all files, never relying on the // compilation directory. if (!Asm->OutStreamer->hasRawTextSupport() || SingleCU) - Asm->OutStreamer->getContext().setMCLineTableCompilationDir( - NewCU.getUniqueID(), CompilationDir); + Asm->OutStreamer->emitDwarfFile0Directive( + CompilationDir, FN, NewCU.getMD5AsBytes(DIUnit->getFile()), + DIUnit->getSource(), NewCU.getUniqueID()); StringRef Producer = DIUnit->getProducer(); StringRef Flags = DIUnit->getFlags(); @@ -486,6 +552,10 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) { DIUnit->getSourceLanguage()); NewCU.addString(Die, dwarf::DW_AT_name, FN); + // Add DW_str_offsets_base to the unit DIE, except for split units. + if (useSegmentedStringOffsetsTable() && !useSplitDwarf()) + NewCU.addStringOffsetsStart(); + if (!useSplitDwarf()) { NewCU.initStmtList(); @@ -541,21 +611,22 @@ void DwarfDebug::constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU, /// Sort and unique GVEs by comparing their fragment offset. static SmallVectorImpl<DwarfCompileUnit::GlobalExpr> & sortGlobalExprs(SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &GVEs) { - std::sort(GVEs.begin(), GVEs.end(), - [](DwarfCompileUnit::GlobalExpr A, DwarfCompileUnit::GlobalExpr B) { - // Sort order: first null exprs, then exprs without fragment - // info, then sort by fragment offset in bits. - // FIXME: Come up with a more comprehensive comparator so - // the sorting isn't non-deterministic, and so the following - // std::unique call works correctly. - if (!A.Expr || !B.Expr) - return !!B.Expr; - auto FragmentA = A.Expr->getFragmentInfo(); - auto FragmentB = B.Expr->getFragmentInfo(); - if (!FragmentA || !FragmentB) - return !!FragmentB; - return FragmentA->OffsetInBits < FragmentB->OffsetInBits; - }); + llvm::sort(GVEs.begin(), GVEs.end(), + [](DwarfCompileUnit::GlobalExpr A, + DwarfCompileUnit::GlobalExpr B) { + // Sort order: first null exprs, then exprs without fragment + // info, then sort by fragment offset in bits. + // FIXME: Come up with a more comprehensive comparator so + // the sorting isn't non-deterministic, and so the following + // std::unique call works correctly. + if (!A.Expr || !B.Expr) + return !!B.Expr; + auto FragmentA = A.Expr->getFragmentInfo(); + auto FragmentB = B.Expr->getFragmentInfo(); + if (!FragmentA || !FragmentB) + return !!FragmentB; + return FragmentA->OffsetInBits < FragmentB->OffsetInBits; + }); GVEs.erase(std::unique(GVEs.begin(), GVEs.end(), [](DwarfCompileUnit::GlobalExpr A, DwarfCompileUnit::GlobalExpr B) { @@ -590,6 +661,19 @@ void DwarfDebug::beginModule() { GVMap[GVE->getVariable()].push_back({&Global, GVE->getExpression()}); } + // Create the symbol that designates the start of the unit's contribution + // to the string offsets table. In a split DWARF scenario, only the skeleton + // unit has the DW_AT_str_offsets_base attribute (and hence needs the symbol). + if (useSegmentedStringOffsetsTable()) + (useSplitDwarf() ? SkeletonHolder : InfoHolder) + .setStringOffsetsStartSym(Asm->createTempSymbol("str_offsets_base")); + + // Create the symbol that designates the start of the DWARF v5 range list + // table. It is located past the header and before the offsets table. + if (getDwarfVersion() >= 5) + (useSplitDwarf() ? SkeletonHolder : InfoHolder) + .setRnglistsTableBaseSym(Asm->createTempSymbol("rnglists_table_base")); + for (DICompileUnit *CUNode : M->debug_compile_units()) { // FIXME: Move local imported entities into a list attached to the // subprogram, then this search won't be needed and a @@ -694,11 +778,15 @@ void DwarfDebug::finalizeModuleInfo() { // Emit a unique identifier for this CU. uint64_t ID = DIEHash(Asm).computeCUSignature(DWOName, TheCU.getUnitDie()); - TheCU.addUInt(TheCU.getUnitDie(), dwarf::DW_AT_GNU_dwo_id, - dwarf::DW_FORM_data8, ID); - SkCU->addUInt(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id, - dwarf::DW_FORM_data8, ID); - + if (getDwarfVersion() >= 5) { + TheCU.setDWOId(ID); + SkCU->setDWOId(ID); + } else { + TheCU.addUInt(TheCU.getUnitDie(), dwarf::DW_AT_GNU_dwo_id, + dwarf::DW_FORM_data8, ID); + SkCU->addUInt(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id, + dwarf::DW_FORM_data8, ID); + } // We don't keep track of which addresses are used in which CU so this // is a bit pessimistic under LTO. if (!AddrPool.isEmpty()) { @@ -706,7 +794,7 @@ void DwarfDebug::finalizeModuleInfo() { SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_addr_base, Sym, Sym); } - if (!SkCU->getRangeLists().empty()) { + if (getDwarfVersion() < 5 && !SkCU->getRangeLists().empty()) { const MCSymbol *Sym = TLOF.getDwarfRangesSection()->getBeginSymbol(); SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_ranges_base, Sym, Sym); @@ -721,7 +809,7 @@ void DwarfDebug::finalizeModuleInfo() { // ranges for all subprogram DIEs for mach-o. DwarfCompileUnit &U = SkCU ? *SkCU : TheCU; if (unsigned NumRanges = TheCU.getRanges().size()) { - if (NumRanges > 1) + if (NumRanges > 1 && useRangesSection()) // A DW_AT_low_pc attribute may also be specified in combination with // DW_AT_ranges to specify the default base address for use in // location lists (see Section 2.6.2) and range lists (see Section @@ -732,6 +820,10 @@ void DwarfDebug::finalizeModuleInfo() { U.attachRangesOrLowHighPC(U.getUnitDie(), TheCU.takeRanges()); } + if (getDwarfVersion() >= 5 && !useSplitDwarf() && + !U.getRangeLists().empty()) + U.addRnglistsBase(); + auto *CUNode = cast<DICompileUnit>(P.first); // If compile Unit has macros, emit "DW_AT_macro_info" attribute. if (CUNode->getMacros()) @@ -799,11 +891,20 @@ void DwarfDebug::endModule() { } // Emit info into the dwarf accelerator table sections. - if (useDwarfAccelTables()) { + switch (getAccelTableKind()) { + case AccelTableKind::Apple: emitAccelNames(); emitAccelObjC(); emitAccelNamespaces(); emitAccelTypes(); + break; + case AccelTableKind::Dwarf: + emitAccelDebugNames(); + break; + case AccelTableKind::None: + break; + case AccelTableKind::Default: + llvm_unreachable("Default should have already been resolved."); } // Emit the pubnames and pubtypes sections if requested. @@ -887,7 +988,7 @@ static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) { llvm_unreachable("Unexpected 4-operand DBG_VALUE instruction!"); } -/// \brief If this and Next are describing different fragments of the same +/// If this and Next are describing different fragments of the same /// variable, merge them by appending Next's values to the current /// list of values. /// Return true if the merge was successful. @@ -903,8 +1004,7 @@ bool DebugLocEntry::MergeValues(const DebugLocEntry &Next) { // sorted. for (unsigned i = 0, j = 0; i < Values.size(); ++i) { for (; j < Next.Values.size(); ++j) { - int res = DebugHandlerBase::fragmentCmp( - cast<DIExpression>(Values[i].Expression), + int res = cast<DIExpression>(Values[i].Expression)->fragmentCmp( cast<DIExpression>(Next.Values[j].Expression)); if (res == 0) // The two expressions overlap, we can't merge. return false; @@ -967,7 +1067,7 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, // If this fragment overlaps with any open ranges, truncate them. const DIExpression *DIExpr = Begin->getDebugExpression(); auto Last = remove_if(OpenRanges, [&](DebugLocEntry::Value R) { - return fragmentsOverlap(DIExpr, R.getExpression()); + return DIExpr->fragmentsOverlap(R.getExpression()); }); OpenRanges.erase(Last, OpenRanges.end()); @@ -983,7 +1083,7 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, EndLabel = getLabelBeforeInsn(std::next(I)->first); assert(EndLabel && "Forgot label after instruction ending a range!"); - DEBUG(dbgs() << "DotDebugLoc: " << *Begin << "\n"); + LLVM_DEBUG(dbgs() << "DotDebugLoc: " << *Begin << "\n"); auto Value = getDebugLocValue(Begin); DebugLocEntry Loc(StartLabel, EndLabel, Value); @@ -1012,7 +1112,7 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, // Attempt to coalesce the ranges of two otherwise identical // DebugLocEntries. auto CurEntry = DebugLoc.rbegin(); - DEBUG({ + LLVM_DEBUG({ dbgs() << CurEntry->getValues().size() << " Values:\n"; for (auto &Value : CurEntry->getValues()) Value.dump(); @@ -1131,6 +1231,9 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, RegVar->initializeDbgValue(MInsn); continue; } + // Do not emit location lists if .debug_loc secton is disabled. + if (!useLocSection()) + continue; // Handle multiple DBG_VALUE instructions describing one variable. DebugLocStream::ListBuilder List(DebugLocs, TheCU, *Asm, *RegVar, *MInsn); @@ -1151,10 +1254,12 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, } // Collect info for variables that were optimized out. - for (const DILocalVariable *DV : SP->getVariables()) { - if (Processed.insert(InlinedVariable(DV, nullptr)).second) - if (LexicalScope *Scope = LScopes.findLexicalScope(DV->getScope())) - createConcreteVariable(TheCU, *Scope, InlinedVariable(DV, nullptr)); + for (const DINode *DN : SP->getRetainedNodes()) { + if (auto *DV = dyn_cast<DILocalVariable>(DN)) { + if (Processed.insert(InlinedVariable(DV, nullptr)).second) + if (LexicalScope *Scope = LScopes.findLexicalScope(DV->getScope())) + createConcreteVariable(TheCU, *Scope, InlinedVariable(DV, nullptr)); + } } } @@ -1168,7 +1273,9 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { return; // Check if source location changes, but ignore DBG_VALUE and CFI locations. - if (MI->isMetaInstruction()) + // If the instruction is part of the function frame setup code, do not emit + // any line record, as there is no correspondence with any user code. + if (MI->isMetaInstruction() || MI->getFlag(MachineInstr::FrameSetup)) return; const DebugLoc &DL = MI->getDebugLoc(); // When we emit a line-0 record, we don't update PrevInstLoc; so look at @@ -1333,14 +1440,16 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) { // Construct abstract scopes. for (LexicalScope *AScope : LScopes.getAbstractScopesList()) { auto *SP = cast<DISubprogram>(AScope->getScopeNode()); - // Collect info for variables that were optimized out. - for (const DILocalVariable *DV : SP->getVariables()) { - if (!ProcessedVars.insert(InlinedVariable(DV, nullptr)).second) - continue; - ensureAbstractVariableIsCreated(TheCU, InlinedVariable(DV, nullptr), - DV->getScope()); - assert(LScopes.getAbstractScopesList().size() == NumAbstractScopes - && "ensureAbstractVariableIsCreated inserted abstract scopes"); + for (const DINode *DN : SP->getRetainedNodes()) { + if (auto *DV = dyn_cast<DILocalVariable>(DN)) { + // Collect info for variables that were optimized out. + if (!ProcessedVars.insert(InlinedVariable(DV, nullptr)).second) + continue; + ensureAbstractVariableIsCreated(TheCU, InlinedVariable(DV, nullptr), + DV->getScope()); + assert(LScopes.getAbstractScopesList().size() == NumAbstractScopes + && "ensureAbstractVariableIsCreated inserted abstract scopes"); + } } constructAbstractSubprogramScopeDIE(TheCU, AScope); } @@ -1366,21 +1475,19 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) { void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S, unsigned Flags) { StringRef Fn; - StringRef Dir; - unsigned Src = 1; + unsigned FileNo = 1; unsigned Discriminator = 0; if (auto *Scope = cast_or_null<DIScope>(S)) { Fn = Scope->getFilename(); - Dir = Scope->getDirectory(); if (Line != 0 && getDwarfVersion() >= 4) if (auto *LBF = dyn_cast<DILexicalBlockFile>(Scope)) Discriminator = LBF->getDiscriminator(); unsigned CUID = Asm->OutStreamer->getContext().getDwarfCompileUnitID(); - Src = static_cast<DwarfCompileUnit &>(*InfoHolder.getUnits()[CUID]) - .getOrCreateSourceID(Fn, Dir); + FileNo = static_cast<DwarfCompileUnit &>(*InfoHolder.getUnits()[CUID]) + .getOrCreateSourceID(Scope->getFile()); } - Asm->OutStreamer->EmitDwarfLocDirective(Src, Line, Col, Flags, 0, + Asm->OutStreamer->EmitDwarfLocDirective(FileNo, Line, Col, Flags, 0, Discriminator, Fn); } @@ -1401,13 +1508,30 @@ void DwarfDebug::emitAbbreviations() { Holder.emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection()); } -void DwarfDebug::emitAccel(DwarfAccelTable &Accel, MCSection *Section, +void DwarfDebug::emitStringOffsetsTableHeader() { + DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; + Holder.getStringPool().emitStringOffsetsTableHeader( + *Asm, Asm->getObjFileLowering().getDwarfStrOffSection(), + Holder.getStringOffsetsStartSym()); +} + +template <typename AccelTableT> +void DwarfDebug::emitAccel(AccelTableT &Accel, MCSection *Section, StringRef TableName) { - Accel.FinalizeTable(Asm, TableName); Asm->OutStreamer->SwitchSection(Section); // Emit the full data. - Accel.emit(Asm, Section->getBeginSymbol(), this); + emitAppleAccelTable(Asm, Accel, TableName, Section->getBeginSymbol()); +} + +void DwarfDebug::emitAccelDebugNames() { + // Don't emit anything if we have no compilation units to index. + if (getUnits().empty()) + return; + + Asm->OutStreamer->SwitchSection( + Asm->getObjFileLowering().getDwarfDebugNamesSection()); + emitDWARF5AccelTable(Asm, AccelDebugNames, *this, getUnits()); } // Emit visible names into a hashed accelerator table section. @@ -1525,6 +1649,14 @@ void DwarfDebug::emitDebugPubSections() { } } +void DwarfDebug::emitSectionReference(const DwarfCompileUnit &CU) { + if (useSectionsAsReferences()) + Asm->EmitDwarfOffset(CU.getSection()->getBeginSymbol(), + CU.getDebugSectionOffset()); + else + Asm->emitDwarfSymbolReference(CU.getLabelBegin()); +} + void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name, DwarfCompileUnit *TheU, const StringMap<const DIE *> &Globals) { @@ -1540,13 +1672,13 @@ void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name, Asm->OutStreamer->EmitLabel(BeginLabel); Asm->OutStreamer->AddComment("DWARF Version"); - Asm->EmitInt16(dwarf::DW_PUBNAMES_VERSION); + Asm->emitInt16(dwarf::DW_PUBNAMES_VERSION); Asm->OutStreamer->AddComment("Offset of Compilation Unit Info"); - Asm->emitDwarfSymbolReference(TheU->getLabelBegin()); + emitSectionReference(*TheU); Asm->OutStreamer->AddComment("Compilation Unit Length"); - Asm->EmitInt32(TheU->getLength()); + Asm->emitInt32(TheU->getLength()); // Emit the pubnames for this compilation unit. for (const auto &GI : Globals) { @@ -1554,14 +1686,14 @@ void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name, const DIE *Entity = GI.second; Asm->OutStreamer->AddComment("DIE offset"); - Asm->EmitInt32(Entity->getOffset()); + Asm->emitInt32(Entity->getOffset()); if (GnuStyle) { dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheU, Entity); Asm->OutStreamer->AddComment( Twine("Kind: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) + ", " + dwarf::GDBIndexEntryLinkageString(Desc.Linkage)); - Asm->EmitInt8(Desc.toBits()); + Asm->emitInt8(Desc.toBits()); } Asm->OutStreamer->AddComment("External Name"); @@ -1569,14 +1701,20 @@ void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name, } Asm->OutStreamer->AddComment("End Mark"); - Asm->EmitInt32(0); + Asm->emitInt32(0); Asm->OutStreamer->EmitLabel(EndLabel); } /// Emit null-terminated strings into a debug str section. void DwarfDebug::emitDebugStr() { + MCSection *StringOffsetsSection = nullptr; + if (useSegmentedStringOffsetsTable()) { + emitStringOffsetsTableHeader(); + StringOffsetsSection = Asm->getObjFileLowering().getDwarfStrOffSection(); + } DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; - Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection()); + Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection(), + StringOffsetsSection, /* UseRelativeOffsets = */ true); } void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer, @@ -1589,7 +1727,6 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer, } static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT, - ByteStreamer &Streamer, const DebugLocEntry::Value &Value, DwarfExpression &DwarfExpr) { auto *DIExpr = Value.getExpression(); @@ -1634,11 +1771,11 @@ void DebugLocEntry::finalize(const AsmPrinter &AP, "fragments are expected to be sorted"); for (auto Fragment : Values) - emitDebugLocValue(AP, BT, Streamer, Fragment, DwarfExpr); + emitDebugLocValue(AP, BT, Fragment, DwarfExpr); } else { assert(Values.size() == 1 && "only fragments may have >1 value"); - emitDebugLocValue(AP, BT, Streamer, Value, DwarfExpr); + emitDebugLocValue(AP, BT, Value, DwarfExpr); } DwarfExpr.finalize(); } @@ -1646,7 +1783,7 @@ void DebugLocEntry::finalize(const AsmPrinter &AP, void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry) { // Emit the size. Asm->OutStreamer->AddComment("Loc expr size"); - Asm->EmitInt16(DebugLocs.getBytes(Entry).size()); + Asm->emitInt16(DebugLocs.getBytes(Entry).size()); // Emit the entry. APByteStreamer Streamer(*Asm); @@ -1694,14 +1831,14 @@ void DwarfDebug::emitDebugLocDWO() { // rather than two. We could get fancier and try to, say, reuse an // address we know we've emitted elsewhere (the start of the function? // The start of the CU or CU subrange that encloses this range?) - Asm->EmitInt8(dwarf::DW_LLE_startx_length); + Asm->emitInt8(dwarf::DW_LLE_startx_length); unsigned idx = AddrPool.getIndex(Entry.BeginSym); Asm->EmitULEB128(idx); Asm->EmitLabelDifference(Entry.EndSym, Entry.BeginSym, 4); emitDebugLocEntryLocation(Entry); } - Asm->EmitInt8(dwarf::DW_LLE_end_of_list); + Asm->emitInt8(dwarf::DW_LLE_end_of_list); } } @@ -1752,7 +1889,7 @@ void DwarfDebug::emitDebugARanges() { } // Sort the symbols by offset within the section. - std::sort( + std::stable_sort( List.begin(), List.end(), [&](const SymbolCU &A, const SymbolCU &B) { unsigned IA = A.Sym ? Asm->OutStreamer->GetSymbolOrder(A.Sym) : 0; unsigned IB = B.Sym ? Asm->OutStreamer->GetSymbolOrder(B.Sym) : 0; @@ -1801,10 +1938,10 @@ void DwarfDebug::emitDebugARanges() { } // Sort the CU list (again, to ensure consistent output order). - std::sort(CUs.begin(), CUs.end(), - [](const DwarfCompileUnit *A, const DwarfCompileUnit *B) { - return A->getUniqueID() < B->getUniqueID(); - }); + llvm::sort(CUs.begin(), CUs.end(), + [](const DwarfCompileUnit *A, const DwarfCompileUnit *B) { + return A->getUniqueID() < B->getUniqueID(); + }); // Emit an arange table for each CU we used. for (DwarfCompileUnit *CU : CUs) { @@ -1832,15 +1969,15 @@ void DwarfDebug::emitDebugARanges() { // For each compile unit, write the list of spans it covers. Asm->OutStreamer->AddComment("Length of ARange Set"); - Asm->EmitInt32(ContentSize); + Asm->emitInt32(ContentSize); Asm->OutStreamer->AddComment("DWARF Arange version number"); - Asm->EmitInt16(dwarf::DW_ARANGES_VERSION); + Asm->emitInt16(dwarf::DW_ARANGES_VERSION); Asm->OutStreamer->AddComment("Offset Into Debug Info Section"); - Asm->emitDwarfSymbolReference(CU->getLabelBegin()); + emitSectionReference(*CU); Asm->OutStreamer->AddComment("Address Size (in bytes)"); - Asm->EmitInt8(PtrSize); + Asm->emitInt8(PtrSize); Asm->OutStreamer->AddComment("Segment Size (in bytes)"); - Asm->EmitInt8(0); + Asm->emitInt8(0); Asm->OutStreamer->emitFill(Padding, 0xff); @@ -1867,17 +2004,151 @@ void DwarfDebug::emitDebugARanges() { } } -/// Emit address ranges into a debug ranges section. +/// Emit a single range list. We handle both DWARF v5 and earlier. +static void emitRangeList(AsmPrinter *Asm, DwarfCompileUnit *CU, + const RangeSpanList &List) { + + auto DwarfVersion = CU->getDwarfVersion(); + // Emit our symbol so we can find the beginning of the range. + Asm->OutStreamer->EmitLabel(List.getSym()); + // Gather all the ranges that apply to the same section so they can share + // a base address entry. + MapVector<const MCSection *, std::vector<const RangeSpan *>> SectionRanges; + // Size for our labels. + auto Size = Asm->MAI->getCodePointerSize(); + + for (const RangeSpan &Range : List.getRanges()) + SectionRanges[&Range.getStart()->getSection()].push_back(&Range); + + auto *CUBase = CU->getBaseAddress(); + bool BaseIsSet = false; + for (const auto &P : SectionRanges) { + // Don't bother with a base address entry if there's only one range in + // this section in this range list - for example ranges for a CU will + // usually consist of single regions from each of many sections + // (-ffunction-sections, or just C++ inline functions) except under LTO + // or optnone where there may be holes in a single CU's section + // contributions. + auto *Base = CUBase; + if (!Base && P.second.size() > 1 && + (UseDwarfRangesBaseAddressSpecifier || DwarfVersion >= 5)) { + BaseIsSet = true; + // FIXME/use care: This may not be a useful base address if it's not + // the lowest address/range in this object. + Base = P.second.front()->getStart(); + if (DwarfVersion >= 5) { + Asm->OutStreamer->AddComment("DW_RLE_base_address"); + Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_base_address, 1); + } else + Asm->OutStreamer->EmitIntValue(-1, Size); + Asm->OutStreamer->AddComment(" base address"); + Asm->OutStreamer->EmitSymbolValue(Base, Size); + } else if (BaseIsSet && DwarfVersion < 5) { + BaseIsSet = false; + assert(!Base); + Asm->OutStreamer->EmitIntValue(-1, Size); + Asm->OutStreamer->EmitIntValue(0, Size); + } + + for (const auto *RS : P.second) { + const MCSymbol *Begin = RS->getStart(); + const MCSymbol *End = RS->getEnd(); + assert(Begin && "Range without a begin symbol?"); + assert(End && "Range without an end symbol?"); + if (Base) { + if (DwarfVersion >= 5) { + // Emit DW_RLE_offset_pair when we have a base. + Asm->OutStreamer->AddComment("DW_RLE_offset_pair"); + Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_offset_pair, 1); + Asm->OutStreamer->AddComment(" starting offset"); + Asm->EmitLabelDifferenceAsULEB128(Begin, Base); + Asm->OutStreamer->AddComment(" ending offset"); + Asm->EmitLabelDifferenceAsULEB128(End, Base); + } else { + Asm->EmitLabelDifference(Begin, Base, Size); + Asm->EmitLabelDifference(End, Base, Size); + } + } else if (DwarfVersion >= 5) { + Asm->OutStreamer->AddComment("DW_RLE_start_length"); + Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_start_length, 1); + Asm->OutStreamer->AddComment(" start"); + Asm->OutStreamer->EmitSymbolValue(Begin, Size); + Asm->OutStreamer->AddComment(" length"); + Asm->EmitLabelDifferenceAsULEB128(End, Begin); + } else { + Asm->OutStreamer->EmitSymbolValue(Begin, Size); + Asm->OutStreamer->EmitSymbolValue(End, Size); + } + } + } + if (DwarfVersion >= 5) { + Asm->OutStreamer->AddComment("DW_RLE_end_of_list"); + Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_end_of_list, 1); + } else { + // Terminate the list with two 0 values. + Asm->OutStreamer->EmitIntValue(0, Size); + Asm->OutStreamer->EmitIntValue(0, Size); + } +} + +// Emit the header of a DWARF 5 range list table. Returns the symbol that +// designates the end of the table for the caller to emit when the table is +// complete. +static MCSymbol *emitRnglistsTableHeader(AsmPrinter *Asm, DwarfFile &Holder) { + // The length is described by a starting label right after the length field + // and an end label. + MCSymbol *TableStart = Asm->createTempSymbol("debug_rnglist_table_start"); + MCSymbol *TableEnd = Asm->createTempSymbol("debug_rnglist_table_end"); + // Build the range table header, which starts with the length field. + Asm->EmitLabelDifference(TableEnd, TableStart, 4); + Asm->OutStreamer->EmitLabel(TableStart); + // Version number (DWARF v5 and later). + Asm->emitInt16(Asm->OutStreamer->getContext().getDwarfVersion()); + // Address size. + Asm->emitInt8(Asm->MAI->getCodePointerSize()); + // Segment selector size. + Asm->emitInt8(0); + + MCSymbol *RnglistTableBaseSym = Holder.getRnglistsTableBaseSym(); + + // FIXME: Generate the offsets table and use DW_FORM_rnglistx with the + // DW_AT_ranges attribute. Until then set the number of offsets to 0. + Asm->emitInt32(0); + Asm->OutStreamer->EmitLabel(RnglistTableBaseSym); + return TableEnd; +} + +/// Emit address ranges into the .debug_ranges section or into the DWARF v5 +/// .debug_rnglists section. void DwarfDebug::emitDebugRanges() { if (CUMap.empty()) return; - // Start the dwarf ranges section. - Asm->OutStreamer->SwitchSection( - Asm->getObjFileLowering().getDwarfRangesSection()); + auto NoRangesPresent = [this]() { + return llvm::all_of( + CUMap, [](const decltype(CUMap)::const_iterator::value_type &Pair) { + return Pair.second->getRangeLists().empty(); + }); + }; - // Size for our labels. - unsigned char Size = Asm->MAI->getCodePointerSize(); + if (!useRangesSection()) { + assert(NoRangesPresent() && "No debug ranges expected."); + return; + } + + if (getDwarfVersion() >= 5 && NoRangesPresent()) + return; + + // Start the dwarf ranges section. + MCSymbol *TableEnd = nullptr; + if (getDwarfVersion() >= 5) { + Asm->OutStreamer->SwitchSection( + Asm->getObjFileLowering().getDwarfRnglistsSection()); + TableEnd = emitRnglistsTableHeader(Asm, useSplitDwarf() ? SkeletonHolder + : InfoHolder); + } else + Asm->OutStreamer->SwitchSection( + Asm->getObjFileLowering().getDwarfRangesSection()); // Grab the specific ranges for the compile units in the module. for (const auto &I : CUMap) { @@ -1887,61 +2158,12 @@ void DwarfDebug::emitDebugRanges() { TheCU = Skel; // Iterate over the misc ranges for the compile units in the module. - for (const RangeSpanList &List : TheCU->getRangeLists()) { - // Emit our symbol so we can find the beginning of the range. - Asm->OutStreamer->EmitLabel(List.getSym()); - - // Gather all the ranges that apply to the same section so they can share - // a base address entry. - MapVector<const MCSection *, std::vector<const RangeSpan *>> MV; - for (const RangeSpan &Range : List.getRanges()) { - MV[&Range.getStart()->getSection()].push_back(&Range); - } - - auto *CUBase = TheCU->getBaseAddress(); - bool BaseIsSet = false; - for (const auto &P : MV) { - // Don't bother with a base address entry if there's only one range in - // this section in this range list - for example ranges for a CU will - // usually consist of single regions from each of many sections - // (-ffunction-sections, or just C++ inline functions) except under LTO - // or optnone where there may be holes in a single CU's section - // contrubutions. - auto *Base = CUBase; - if (!Base && P.second.size() > 1 && - UseDwarfRangesBaseAddressSpecifier) { - BaseIsSet = true; - // FIXME/use care: This may not be a useful base address if it's not - // the lowest address/range in this object. - Base = P.second.front()->getStart(); - Asm->OutStreamer->EmitIntValue(-1, Size); - Asm->OutStreamer->EmitSymbolValue(Base, Size); - } else if (BaseIsSet) { - BaseIsSet = false; - Asm->OutStreamer->EmitIntValue(-1, Size); - Asm->OutStreamer->EmitIntValue(0, Size); - } - - for (const auto *RS : P.second) { - const MCSymbol *Begin = RS->getStart(); - const MCSymbol *End = RS->getEnd(); - assert(Begin && "Range without a begin symbol?"); - assert(End && "Range without an end symbol?"); - if (Base) { - Asm->EmitLabelDifference(Begin, Base, Size); - Asm->EmitLabelDifference(End, Base, Size); - } else { - Asm->OutStreamer->EmitSymbolValue(Begin, Size); - Asm->OutStreamer->EmitSymbolValue(End, Size); - } - } - } - - // And terminate the list with two 0 values. - Asm->OutStreamer->EmitIntValue(0, Size); - Asm->OutStreamer->EmitIntValue(0, Size); - } + for (const RangeSpanList &List : TheCU->getRangeLists()) + emitRangeList(Asm, TheCU, List); } + + if (TableEnd) + Asm->OutStreamer->EmitLabel(TableEnd); } void DwarfDebug::handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U) { @@ -1963,20 +2185,17 @@ void DwarfDebug::emitMacro(DIMacro &M) { Asm->OutStreamer->EmitBytes(Name); if (!Value.empty()) { // There should be one space between macro name and macro value. - Asm->EmitInt8(' '); + Asm->emitInt8(' '); Asm->OutStreamer->EmitBytes(Value); } - Asm->EmitInt8('\0'); + Asm->emitInt8('\0'); } void DwarfDebug::emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U) { assert(F.getMacinfoType() == dwarf::DW_MACINFO_start_file); Asm->EmitULEB128(dwarf::DW_MACINFO_start_file); Asm->EmitULEB128(F.getLine()); - DIFile *File = F.getFile(); - unsigned FID = - U.getOrCreateSourceID(File->getFilename(), File->getDirectory()); - Asm->EmitULEB128(FID); + Asm->EmitULEB128(U.getOrCreateSourceID(F.getFile())); handleMacroNodes(F.getElements(), U); Asm->EmitULEB128(dwarf::DW_MACINFO_end_file); } @@ -1995,11 +2214,14 @@ void DwarfDebug::emitDebugMacinfo() { auto *SkCU = TheCU.getSkeleton(); DwarfCompileUnit &U = SkCU ? *SkCU : TheCU; auto *CUNode = cast<DICompileUnit>(P.first); - Asm->OutStreamer->EmitLabel(U.getMacroLabelBegin()); - handleMacroNodes(CUNode->getMacros(), U); + DIMacroNodeArray Macros = CUNode->getMacros(); + if (!Macros.empty()) { + Asm->OutStreamer->EmitLabel(U.getMacroLabelBegin()); + handleMacroNodes(Macros, U); + } } Asm->OutStreamer->AddComment("End Of Macro List Mark"); - Asm->EmitInt8(0); + Asm->emitInt8(0); } // DWARF5 Experimental Separate Dwarf emitters. @@ -2017,9 +2239,6 @@ void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die, SkeletonHolder.addUnit(std::move(NewU)); } -// This DIE has the following attributes: DW_AT_comp_dir, DW_AT_stmt_list, -// DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges, DW_AT_dwo_name, DW_AT_dwo_id, -// DW_AT_addr_base, DW_AT_ranges_base. DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) { auto OwnedUnit = llvm::make_unique<DwarfCompileUnit>( @@ -2029,6 +2248,9 @@ DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) { NewCU.initStmtList(); + if (useSegmentedStringOffsetsTable()) + NewCU.addStringOffsetsStart(); + initSkeletonUnit(CU, NewCU.getUnitDie(), std::move(OwnedUnit)); return NewCU; @@ -2051,26 +2273,37 @@ void DwarfDebug::emitDebugAbbrevDWO() { void DwarfDebug::emitDebugLineDWO() { assert(useSplitDwarf() && "No split dwarf?"); - Asm->OutStreamer->SwitchSection( + SplitTypeUnitFileTable.Emit( + *Asm->OutStreamer, MCDwarfLineTableParams(), Asm->getObjFileLowering().getDwarfLineDWOSection()); - SplitTypeUnitFileTable.Emit(*Asm->OutStreamer, MCDwarfLineTableParams()); +} + +void DwarfDebug::emitStringOffsetsTableHeaderDWO() { + assert(useSplitDwarf() && "No split dwarf?"); + InfoHolder.getStringPool().emitStringOffsetsTableHeader( + *Asm, Asm->getObjFileLowering().getDwarfStrOffDWOSection(), + InfoHolder.getStringOffsetsStartSym()); } // Emit the .debug_str.dwo section for separated dwarf. This contains the // string section and is identical in format to traditional .debug_str // sections. void DwarfDebug::emitDebugStrDWO() { + if (useSegmentedStringOffsetsTable()) + emitStringOffsetsTableHeaderDWO(); assert(useSplitDwarf() && "No split dwarf?"); MCSection *OffSec = Asm->getObjFileLowering().getDwarfStrOffDWOSection(); InfoHolder.emitStrings(Asm->getObjFileLowering().getDwarfStrDWOSection(), - OffSec); + OffSec, /* UseRelativeOffsets = */ false); } MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) { if (!useSplitDwarf()) return nullptr; - if (SingleCU) - SplitTypeUnitFileTable.setCompilationDir(CU.getCUNode()->getDirectory()); + const DICompileUnit *DIUnit = CU.getCUNode(); + SplitTypeUnitFileTable.maybeSetRootFile( + DIUnit->getDirectory(), DIUnit->getFilename(), + CU.getMD5AsBytes(DIUnit->getFile()), DIUnit->getSource()); return &SplitTypeUnitFileTable; } @@ -2119,10 +2352,16 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, if (useSplitDwarf()) NewTU.setSection(Asm->getObjFileLowering().getDwarfTypesDWOSection()); else { - CU.applyStmtList(UnitDie); NewTU.setSection(Asm->getObjFileLowering().getDwarfTypesSection(Signature)); + // Non-split type units reuse the compile unit's line table. + CU.applyStmtList(UnitDie); } + // Add DW_AT_str_offsets_base to the type unit DIE, but not for split type + // units. + if (useSegmentedStringOffsetsTable() && !useSplitDwarf()) + NewTU.addStringOffsetsStart(); + NewTU.setType(NewTU.createTypeDIE(CTy)); if (TopLevelType) { @@ -2157,32 +2396,50 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, CU.addDIETypeSignature(RefDie, Signature); } -// Accelerator table mutators - add each name along with its companion -// DIE to the proper table while ensuring that the name that we're going -// to reference is in the string table. We do this since the names we -// add may not only be identical to the names in the DIE. -void DwarfDebug::addAccelName(StringRef Name, const DIE &Die) { - if (!useDwarfAccelTables()) +// Add the Name along with its companion DIE to the appropriate accelerator +// table (for AccelTableKind::Dwarf it's always AccelDebugNames, for +// AccelTableKind::Apple, we use the table we got as an argument). If +// accelerator tables are disabled, this function does nothing. +template <typename DataT> +void DwarfDebug::addAccelNameImpl(AccelTable<DataT> &AppleAccel, StringRef Name, + const DIE &Die) { + if (getAccelTableKind() == AccelTableKind::None) return; - AccelNames.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die); + + DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; + DwarfStringPoolEntryRef Ref = + Holder.getStringPool().getEntry(*Asm, Name); + + switch (getAccelTableKind()) { + case AccelTableKind::Apple: + AppleAccel.addName(Ref, Die); + break; + case AccelTableKind::Dwarf: + AccelDebugNames.addName(Ref, Die); + break; + case AccelTableKind::Default: + llvm_unreachable("Default should have already been resolved."); + case AccelTableKind::None: + llvm_unreachable("None handled above"); + } +} + +void DwarfDebug::addAccelName(StringRef Name, const DIE &Die) { + addAccelNameImpl(AccelNames, Name, Die); } void DwarfDebug::addAccelObjC(StringRef Name, const DIE &Die) { - if (!useDwarfAccelTables()) - return; - AccelObjC.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die); + // ObjC names go only into the Apple accelerator tables. + if (getAccelTableKind() == AccelTableKind::Apple) + addAccelNameImpl(AccelObjC, Name, Die); } void DwarfDebug::addAccelNamespace(StringRef Name, const DIE &Die) { - if (!useDwarfAccelTables()) - return; - AccelNamespace.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die); + addAccelNameImpl(AccelNamespace, Name, Die); } void DwarfDebug::addAccelType(StringRef Name, const DIE &Die, char Flags) { - if (!useDwarfAccelTables()) - return; - AccelTypes.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die); + addAccelNameImpl(AccelTypes, Name, Die); } uint16_t DwarfDebug::getDwarfVersion() const { diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index 2ae0b418a91e..0c7be5d27dfe 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -18,7 +18,6 @@ #include "DbgValueHistoryCalculator.h" #include "DebugHandlerBase.h" #include "DebugLocStream.h" -#include "DwarfAccelTable.h" #include "DwarfFile.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" @@ -31,6 +30,7 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/CodeGen/AccelTable.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" @@ -192,6 +192,14 @@ struct SymbolCU { DwarfCompileUnit *CU; }; +/// The kind of accelerator tables we should emit. +enum class AccelTableKind { + Default, ///< Platform default. + None, ///< None. + Apple, ///< .apple_names, .apple_namespaces, .apple_types, .apple_objc. + Dwarf, ///< DWARF v5 .debug_names. +}; + /// Collects and handles dwarf debug information. class DwarfDebug : public DebugHandlerBase { /// All DIEValues are allocated through this allocator. @@ -255,12 +263,37 @@ class DwarfDebug : public DebugHandlerBase { /// Whether to emit all linkage names, or just abstract subprograms. bool UseAllLinkageNames; + /// Use inlined strings. + bool UseInlineStrings = false; + + /// Whether to emit DWARF pub sections or not. + bool UsePubSections = true; + + /// Allow emission of .debug_ranges section. + bool UseRangesSection = true; + + /// True if the sections itself must be used as references and don't create + /// temp symbols inside DWARF sections. + bool UseSectionsAsReferences = false; + + ///Allow emission of the .debug_loc section. + bool UseLocSection = true; + + /// Generate DWARF v4 type units. + bool GenerateTypeUnits; + /// DWARF5 Experimental Options /// @{ - bool HasDwarfAccelTables; + AccelTableKind TheAccelTableKind; bool HasAppleExtensionAttributes; bool HasSplitDwarf; + /// Whether to generate the DWARF v5 string offsets table. + /// It consists of a series of contributions, each preceded by a header. + /// The pre-DWARF v5 string offsets table for split dwarf is, in contrast, + /// a monolithic sequence of string offsets. + bool UseSegmentedStringOffsetsTable; + /// Separated Dwarf Variables /// In general these will all be for bits that are left in the /// original object file, rather than things that are meant @@ -283,10 +316,12 @@ class DwarfDebug : public DebugHandlerBase { AddressPool AddrPool; - DwarfAccelTable AccelNames; - DwarfAccelTable AccelObjC; - DwarfAccelTable AccelNamespace; - DwarfAccelTable AccelTypes; + /// Accelerator tables. + AccelTable<DWARF5AccelTableData> AccelDebugNames; + AccelTable<AppleAccelTableOffsetData> AccelNames; + AccelTable<AppleAccelTableOffsetData> AccelObjC; + AccelTable<AppleAccelTableOffsetData> AccelNamespace; + AccelTable<AppleAccelTableTypeData> AccelTypes; // Identify a debugger for "tuning" the debug info. DebuggerKind DebuggerTuning = DebuggerKind::Default; @@ -299,9 +334,9 @@ class DwarfDebug : public DebugHandlerBase { using InlinedVariable = DbgValueHistoryMap::InlinedVariable; - void ensureAbstractVariableIsCreated(DwarfCompileUnit &CU, InlinedVariable Var, + void ensureAbstractVariableIsCreated(DwarfCompileUnit &CU, InlinedVariable IV, const MDNode *Scope); - void ensureAbstractVariableIsCreatedIfScoped(DwarfCompileUnit &CU, InlinedVariable Var, + void ensureAbstractVariableIsCreatedIfScoped(DwarfCompileUnit &CU, InlinedVariable IV, const MDNode *Scope); DbgVariable *createConcreteVariable(DwarfCompileUnit &TheCU, @@ -310,6 +345,10 @@ class DwarfDebug : public DebugHandlerBase { /// Construct a DIE for this abstract scope. void constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, LexicalScope *Scope); + template <typename DataT> + void addAccelNameImpl(AccelTable<DataT> &AppleAccel, StringRef Name, + const DIE &Die); + void finishVariableDefinitions(); void finishSubprogramDefinitions(); @@ -324,9 +363,15 @@ class DwarfDebug : public DebugHandlerBase { /// Emit the abbreviation section. void emitAbbreviations(); + /// Emit the string offsets table header. + void emitStringOffsetsTableHeader(); + /// Emit a specified accelerator table. - void emitAccel(DwarfAccelTable &Accel, MCSection *Section, - StringRef TableName); + template <typename AccelTableT> + void emitAccel(AccelTableT &Accel, MCSection *Section, StringRef TableName); + + /// Emit DWARF v5 accelerator table. + void emitAccelDebugNames(); /// Emit visible names into a hashed accelerator table section. void emitAccelNames(); @@ -363,6 +408,9 @@ class DwarfDebug : public DebugHandlerBase { /// Emit address ranges into a debug ranges section. void emitDebugRanges(); + /// Emit range lists into a DWARF v5 debug rnglists section. + void emitDebugRnglists(); + /// Emit macros into a debug macinfo section. void emitDebugMacinfo(); void emitMacro(DIMacro &M); @@ -375,8 +423,13 @@ class DwarfDebug : public DebugHandlerBase { void initSkeletonUnit(const DwarfUnit &U, DIE &Die, std::unique_ptr<DwarfCompileUnit> NewU); - /// Construct the split debug info compile unit for the debug info - /// section. + /// Construct the split debug info compile unit for the debug info section. + /// In DWARF v5, the skeleton unit DIE may have the following attributes: + /// DW_AT_addr_base, DW_AT_comp_dir, DW_AT_dwo_name, DW_AT_high_pc, + /// DW_AT_low_pc, DW_AT_ranges, DW_AT_stmt_list, and DW_AT_str_offsets_base. + /// Prior to DWARF v5 it may also have DW_AT_GNU_dwo_id. DW_AT_GNU_dwo_name + /// is used instead of DW_AT_dwo_name, Dw_AT_GNU_addr_base instead of + /// DW_AT_addr_base, and DW_AT_GNU_ranges_base instead of DW_AT_rnglists_base. DwarfCompileUnit &constructSkeletonCU(const DwarfCompileUnit &CU); /// Emit the debug info dwo section. @@ -388,6 +441,9 @@ class DwarfDebug : public DebugHandlerBase { /// Emit the debug line dwo section. void emitDebugLineDWO(); + /// Emit the dwo stringoffsets table header. + void emitStringOffsetsTableHeaderDWO(); + /// Emit the debug str dwo section. void emitDebugStrDWO(); @@ -422,6 +478,9 @@ class DwarfDebug : public DebugHandlerBase { void collectVariableInfoFromMFTable(DwarfCompileUnit &TheCU, DenseSet<InlinedVariable> &P); + /// Emit the reference to the section. + void emitSectionReference(const DwarfCompileUnit &CU); + protected: /// Gather pre-function debug information. void beginFunctionImpl(const MachineFunction *MF) override; @@ -478,11 +537,30 @@ public: /// DWARF4 format. bool useDWARF2Bitfields() const { return UseDWARF2Bitfields; } + /// Returns whether to use inline strings. + bool useInlineStrings() const { return UseInlineStrings; } + + /// Returns whether GNU pub sections should be emitted. + bool usePubSections() const { return UsePubSections; } + + /// Returns whether ranges section should be emitted. + bool useRangesSection() const { return UseRangesSection; } + + /// Returns whether to use sections as labels rather than temp symbols. + bool useSectionsAsReferences() const { + return UseSectionsAsReferences; + } + + /// Returns whether .debug_loc section should be emitted. + bool useLocSection() const { return UseLocSection; } + + /// Returns whether to generate DWARF v4 type units. + bool generateTypeUnits() const { return GenerateTypeUnits; } + // Experimental DWARF5 features. - /// Returns whether or not to emit tables that dwarf consumers can - /// use to accelerate lookup. - bool useDwarfAccelTables() const { return HasDwarfAccelTables; } + /// Returns what kind (if any) of accelerator tables to emit. + AccelTableKind getAccelTableKind() const { return TheAccelTableKind; } bool useAppleExtensionAttributes() const { return HasAppleExtensionAttributes; @@ -492,6 +570,16 @@ public: /// split dwarf proposal support. bool useSplitDwarf() const { return HasSplitDwarf; } + /// Returns whether to generate a string offsets table with (possibly shared) + /// contributions from each CU and type unit. This implies the use of + /// DW_FORM_strx* indirect references with DWARF v5 and beyond. Note that + /// DW_FORM_GNU_str_index is also an indirect reference, but it is used with + /// a pre-DWARF v5 implementation of split DWARF sections, which uses a + /// monolithic string offsets table. + bool useSegmentedStringOffsetsTable() const { + return UseSegmentedStringOffsetsTable; + } + bool shareAcrossDWOCUs() const; /// Returns the Dwarf Version. @@ -537,6 +625,9 @@ public: /// Find the matching DwarfCompileUnit for the given CU DIE. DwarfCompileUnit *lookupCU(const DIE *Die) { return CUDieMap.lookup(Die); } + const DwarfCompileUnit *lookupCU(const DIE *Die) const { + return CUDieMap.lookup(Die); + } /// \defgroup DebuggerTuning Predicates to tune DWARF for a given debugger. /// diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h index 80d5bd208ed8..b57ea8fc6322 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h @@ -70,7 +70,7 @@ public: }; class LLVM_LIBRARY_VISIBILITY ARMException : public DwarfCFIExceptionBase { - void emitTypeInfos(unsigned TTypeEncoding) override; + void emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) override; ARMTargetStreamer &getTargetStreamer(); public: diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index 68d25fe37b43..d8d1a5e8f841 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -123,7 +123,10 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI, const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(MachineReg); unsigned RegSize = TRI.getRegSizeInBits(*RC); // Keep track of the bits in the register we already emitted, so we - // can avoid emitting redundant aliasing subregs. + // can avoid emitting redundant aliasing subregs. Because this is + // just doing a greedy scan of all subregisters, it is possible that + // this doesn't find a combination of subregisters that fully cover + // the register (even though one may exist). SmallBitVector Coverage(RegSize, false); for (MCSubRegIterator SR(MachineReg, &TRI); SR.isValid(); ++SR) { unsigned Idx = TRI.getSubRegIndex(MachineReg, *SR); @@ -143,7 +146,7 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI, if (CurSubReg.test(Coverage)) { // Emit a piece for any gap in the coverage. if (Offset > CurPos) - DwarfRegs.push_back({-1, Offset - CurPos, nullptr}); + DwarfRegs.push_back({-1, Offset - CurPos, "no DWARF register encoding"}); DwarfRegs.push_back( {Reg, std::min<unsigned>(Size, MaxSize - Offset), "sub-register"}); if (Offset >= MaxSize) @@ -154,8 +157,13 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI, CurPos = Offset + Size; } } - - return CurPos; + // Failed to find any DWARF encoding. + if (CurPos == 0) + return false; + // Found a partial or complete DWARF encoding. + if (CurPos < RegSize) + DwarfRegs.push_back({-1, RegSize - CurPos, "no DWARF register encoding"}); + return true; } void DwarfExpression::addStackValue() { @@ -341,11 +349,22 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, case dwarf::DW_OP_plus: case dwarf::DW_OP_minus: case dwarf::DW_OP_mul: + case dwarf::DW_OP_div: + case dwarf::DW_OP_mod: + case dwarf::DW_OP_or: + case dwarf::DW_OP_and: + case dwarf::DW_OP_xor: + case dwarf::DW_OP_shl: + case dwarf::DW_OP_shr: + case dwarf::DW_OP_shra: + case dwarf::DW_OP_lit0: + case dwarf::DW_OP_not: + case dwarf::DW_OP_dup: emitOp(Op->getOp()); break; case dwarf::DW_OP_deref: assert(LocationKind != Register); - if (LocationKind != Memory && isMemoryLocation(ExprCursor)) + if (LocationKind != Memory && ::isMemoryLocation(ExprCursor)) // Turning this into a memory location description makes the deref // implicit. LocationKind = Memory; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h index ea5cbc40ba35..952b0d99a95a 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h @@ -211,6 +211,9 @@ public: /// Emit an unsigned constant. void addUnsignedConstant(const APInt &Value); + bool isMemoryLocation() const { return LocationKind == Memory; } + bool isUnknownLocation() const { return LocationKind == Unknown; } + /// Lock this down to become a memory location description. void setMemoryLocationKind() { assert(LocationKind == Unknown); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp index 3c04c969192d..c90bd568162d 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp @@ -77,42 +77,24 @@ unsigned DwarfFile::computeSizeAndOffset(DIE &Die, unsigned Offset) { void DwarfFile::emitAbbrevs(MCSection *Section) { Abbrevs.Emit(Asm, Section); } // Emit strings into a string section. -void DwarfFile::emitStrings(MCSection *StrSection, MCSection *OffsetSection) { - StrPool.emit(*Asm, StrSection, OffsetSection); +void DwarfFile::emitStrings(MCSection *StrSection, MCSection *OffsetSection, + bool UseRelativeOffsets) { + StrPool.emit(*Asm, StrSection, OffsetSection, UseRelativeOffsets); } bool DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) { - SmallVectorImpl<DbgVariable *> &Vars = ScopeVariables[LS]; + auto &ScopeVars = ScopeVariables[LS]; const DILocalVariable *DV = Var->getVariable(); - // Variables with positive arg numbers are parameters. if (unsigned ArgNum = DV->getArg()) { - // Keep all parameters in order at the start of the variable list to ensure - // function types are correct (no out-of-order parameters) - // - // This could be improved by only doing it for optimized builds (unoptimized - // builds have the right order to begin with), searching from the back (this - // would catch the unoptimized case quickly), or doing a binary search - // rather than linear search. - auto I = Vars.begin(); - while (I != Vars.end()) { - unsigned CurNum = (*I)->getVariable()->getArg(); - // A local (non-parameter) variable has been found, insert immediately - // before it. - if (CurNum == 0) - break; - // A later indexed parameter has been found, insert immediately before it. - if (CurNum > ArgNum) - break; - if (CurNum == ArgNum) { - (*I)->addMMIEntry(*Var); - return false; - } - ++I; + auto Cached = ScopeVars.Args.find(ArgNum); + if (Cached == ScopeVars.Args.end()) + ScopeVars.Args[ArgNum] = Var; + else { + Cached->second->addMMIEntry(*Var); + return false; } - Vars.insert(I, Var); - return true; - } - - Vars.push_back(Var); + } else { + ScopeVars.Locals.push_back(Var); + } return true; } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h index 167ca13c19c1..8dfbc4e1c434 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h @@ -17,6 +17,7 @@ #include "llvm/CodeGen/DIE.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/Allocator.h" +#include <map> #include <memory> #include <utility> @@ -43,8 +44,23 @@ class DwarfFile { DwarfStringPool StrPool; - // Collection of dbg variables of a scope. - DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8>> ScopeVariables; + /// DWARF v5: The symbol that designates the start of the contribution to + /// the string offsets table. The contribution is shared by all units. + MCSymbol *StringOffsetsStartSym = nullptr; + + /// DWARF v5: The symbol that designates the base of the range list table. + /// The table is shared by all units. + MCSymbol *RnglistsTableBaseSym = nullptr; + + /// The variables of a lexical scope. + struct ScopeVars { + /// We need to sort Args by ArgNo and check for duplicates. This could also + /// be implemented as a list or vector + std::lower_bound(). + std::map<unsigned, DbgVariable *> Args; + SmallVector<DbgVariable *, 8> Locals; + }; + /// Collection of DbgVariables of each lexical scope. + DenseMap<LexicalScope *, ScopeVars> ScopeVariables; // Collection of abstract subprogram DIEs. DenseMap<const MDNode *, DIE *> AbstractSPDies; @@ -62,39 +78,51 @@ public: return CUs; } - /// \brief Compute the size and offset of a DIE given an incoming Offset. + /// Compute the size and offset of a DIE given an incoming Offset. unsigned computeSizeAndOffset(DIE &Die, unsigned Offset); - /// \brief Compute the size and offset of all the DIEs. + /// Compute the size and offset of all the DIEs. void computeSizeAndOffsets(); - /// \brief Compute the size and offset of all the DIEs in the given unit. + /// Compute the size and offset of all the DIEs in the given unit. /// \returns The size of the root DIE. unsigned computeSizeAndOffsetsForUnit(DwarfUnit *TheU); - /// \brief Add a unit to the list of CUs. + /// Add a unit to the list of CUs. void addUnit(std::unique_ptr<DwarfCompileUnit> U); - /// \brief Emit all of the units to the section listed with the given + /// Emit all of the units to the section listed with the given /// abbreviation section. void emitUnits(bool UseOffsets); - /// \brief Emit the given unit to its section. + /// Emit the given unit to its section. void emitUnit(DwarfUnit *U, bool UseOffsets); - /// \brief Emit a set of abbreviations to the specific section. + /// Emit a set of abbreviations to the specific section. void emitAbbrevs(MCSection *); - /// \brief Emit all of the strings to the section given. - void emitStrings(MCSection *StrSection, MCSection *OffsetSection = nullptr); + /// Emit all of the strings to the section given. If OffsetSection is + /// non-null, emit a table of string offsets to it. If UseRelativeOffsets + /// is false, emit absolute offsets to the strings. Otherwise, emit + /// relocatable references to the strings if they are supported by the target. + void emitStrings(MCSection *StrSection, MCSection *OffsetSection = nullptr, + bool UseRelativeOffsets = false); - /// \brief Returns the string pool. + /// Returns the string pool. DwarfStringPool &getStringPool() { return StrPool; } + MCSymbol *getStringOffsetsStartSym() const { return StringOffsetsStartSym; } + + void setStringOffsetsStartSym(MCSymbol *Sym) { StringOffsetsStartSym = Sym; } + + MCSymbol *getRnglistsTableBaseSym() const { return RnglistsTableBaseSym; } + + void setRnglistsTableBaseSym(MCSymbol *Sym) { RnglistsTableBaseSym = Sym; } + /// \returns false if the variable was merged with a previous one. bool addScopeVariable(LexicalScope *LS, DbgVariable *Var); - DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8>> &getScopeVariables() { + DenseMap<LexicalScope *, ScopeVars> &getScopeVariables() { return ScopeVariables; } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp index aa5f01e88933..a61fa83cfb03 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp @@ -39,8 +39,30 @@ DwarfStringPool::EntryRef DwarfStringPool::getEntry(AsmPrinter &Asm, return EntryRef(*I.first); } +void DwarfStringPool::emitStringOffsetsTableHeader(AsmPrinter &Asm, + MCSection *Section, + MCSymbol *StartSym) { + if (empty()) + return; + Asm.OutStreamer->SwitchSection(Section); + unsigned EntrySize = 4; + // FIXME: DWARF64 + // We are emitting the header for a contribution to the string offsets + // table. The header consists of an entry with the contribution's + // size (not including the size of the length field), the DWARF version and + // 2 bytes of padding. + Asm.emitInt32(size() * EntrySize + 4); + Asm.emitInt16(Asm.getDwarfVersion()); + Asm.emitInt16(0); + // Define the symbol that marks the start of the contribution. It is + // referenced by most unit headers via DW_AT_str_offsets_base. + // Split units do not use the attribute. + if (StartSym) + Asm.OutStreamer->EmitLabel(StartSym); +} + void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection, - MCSection *OffsetSection) { + MCSection *OffsetSection, bool UseRelativeOffsets) { if (Pool.empty()) return; @@ -74,6 +96,9 @@ void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection, Asm.OutStreamer->SwitchSection(OffsetSection); unsigned size = 4; // FIXME: DWARF64 is 8. for (const auto &Entry : Entries) - Asm.OutStreamer->EmitIntValue(Entry->getValue().Offset, size); + if (UseRelativeOffsets) + Asm.emitDwarfStringOffset(Entry->getValue()); + else + Asm.OutStreamer->EmitIntValue(Entry->getValue().Offset, size); } } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h index 1cac3b7c8432..6e6988ea4ad4 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h @@ -19,6 +19,7 @@ namespace llvm { class AsmPrinter; class MCSection; +class MCSymbol; // Collection of strings for this unit and assorted symbols. // A String->Symbol mapping of strings used by indirect @@ -36,11 +37,17 @@ public: DwarfStringPool(BumpPtrAllocator &A, AsmPrinter &Asm, StringRef Prefix); + void emitStringOffsetsTableHeader(AsmPrinter &Asm, MCSection *OffsetSection, + MCSymbol *StartSym); + void emit(AsmPrinter &Asm, MCSection *StrSection, - MCSection *OffsetSection = nullptr); + MCSection *OffsetSection = nullptr, + bool UseRelativeOffsets = false); bool empty() const { return Pool.empty(); } + unsigned size() const { return Pool.size(); } + /// Get a reference to an entry in the string pool. EntryRef getEntry(AsmPrinter &Asm, StringRef Str); }; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 4ea59f504bd4..43b835b2c4aa 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -19,10 +19,10 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/None.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/TargetLoweringObjectFile.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Constants.h" @@ -30,12 +30,14 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Metadata.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MachineLocation.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include <cassert> #include <cstdint> #include <string> @@ -45,11 +47,6 @@ using namespace llvm; #define DEBUG_TYPE "dwarfdebug" -static cl::opt<bool> -GenerateDwarfTypeUnits("generate-type-units", cl::Hidden, - cl::desc("Generate DWARF4 type units."), - cl::init(false)); - DIEDwarfExpression::DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU, DIELoc &DIE) : DwarfExpression(AP.getDwarfVersion()), AP(AP), DU(DU), @@ -83,8 +80,6 @@ DwarfTypeUnit::DwarfTypeUnit(DwarfCompileUnit &CU, AsmPrinter *A, MCDwarfDwoLineTable *SplitLineTable) : DwarfUnit(dwarf::DW_TAG_type_unit, CU.getCUNode(), A, DW, DWU), CU(CU), SplitLineTable(SplitLineTable) { - if (SplitLineTable) - addSectionOffset(getUnitDie(), dwarf::DW_AT_stmt_list, 0); } DwarfUnit::~DwarfUnit() { @@ -185,7 +180,7 @@ bool DwarfUnit::isShareableAcrossCUs(const DINode *D) const { return false; return (isa<DIType>(D) || (isa<DISubprogram>(D) && !cast<DISubprogram>(D)->isDefinition())) && - !GenerateDwarfTypeUnits; + !DD->generateTypeUnits(); } DIE *DwarfUnit::getDIE(const DINode *D) const { @@ -239,9 +234,28 @@ void DwarfUnit::addSInt(DIELoc &Die, Optional<dwarf::Form> Form, void DwarfUnit::addString(DIE &Die, dwarf::Attribute Attribute, StringRef String) { - Die.addValue(DIEValueAllocator, Attribute, - isDwoUnit() ? dwarf::DW_FORM_GNU_str_index : dwarf::DW_FORM_strp, - DIEString(DU->getStringPool().getEntry(*Asm, String))); + if (DD->useInlineStrings()) { + Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_string, + new (DIEValueAllocator) + DIEInlineString(String, DIEValueAllocator)); + return; + } + auto StringPoolEntry = DU->getStringPool().getEntry(*Asm, String); + dwarf::Form IxForm = + isDwoUnit() ? dwarf::DW_FORM_GNU_str_index : dwarf::DW_FORM_strp; + // For DWARF v5 and beyond, use the smallest strx? form possible. + if (useSegmentedStringOffsetsTable()) { + IxForm = dwarf::DW_FORM_strx1; + unsigned Index = StringPoolEntry.getIndex(); + if (Index > 0xffffff) + IxForm = dwarf::DW_FORM_strx4; + else if (Index > 0xffff) + IxForm = dwarf::DW_FORM_strx3; + else if (Index > 0xff) + IxForm = dwarf::DW_FORM_strx2; + } + Die.addValue(DIEValueAllocator, Attribute, IxForm, + DIEString(StringPoolEntry)); } DIEValueList::value_iterator DwarfUnit::addLabel(DIEValueList &Die, @@ -263,9 +277,33 @@ void DwarfUnit::addSectionOffset(DIE &Die, dwarf::Attribute Attribute, addUInt(Die, Attribute, dwarf::DW_FORM_data4, Integer); } -unsigned DwarfTypeUnit::getOrCreateSourceID(StringRef FileName, StringRef DirName) { - return SplitLineTable ? SplitLineTable->getFile(DirName, FileName) - : getCU().getOrCreateSourceID(FileName, DirName); +MD5::MD5Result *DwarfUnit::getMD5AsBytes(const DIFile *File) const { + assert(File); + if (DD->getDwarfVersion() < 5) + return nullptr; + Optional<DIFile::ChecksumInfo<StringRef>> Checksum = File->getChecksum(); + if (!Checksum || Checksum->Kind != DIFile::CSK_MD5) + return nullptr; + + // Convert the string checksum to an MD5Result for the streamer. + // The verifier validates the checksum so we assume it's okay. + // An MD5 checksum is 16 bytes. + std::string ChecksumString = fromHex(Checksum->Value); + void *CKMem = Asm->OutStreamer->getContext().allocate(16, 1); + memcpy(CKMem, ChecksumString.data(), 16); + return reinterpret_cast<MD5::MD5Result *>(CKMem); +} + +unsigned DwarfTypeUnit::getOrCreateSourceID(const DIFile *File) { + if (!SplitLineTable) + return getCU().getOrCreateSourceID(File); + if (!UsedLineTable) { + UsedLineTable = true; + // This is a split type unit that needs a line table. + addSectionOffset(getUnitDie(), dwarf::DW_AT_stmt_list, 0); + } + return SplitLineTable->getFile(File->getDirectory(), File->getFilename(), + getMD5AsBytes(File), File->getSource()); } void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) { @@ -335,12 +373,11 @@ void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute, Die.addValue(DIEValueAllocator, Attribute, Block->BestForm(), Block); } -void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, StringRef File, - StringRef Directory) { +void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, const DIFile *File) { if (Line == 0) return; - unsigned FileID = getOrCreateSourceID(File, Directory); + unsigned FileID = getOrCreateSourceID(File); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); addUInt(Die, dwarf::DW_AT_decl_line, None, Line); @@ -349,32 +386,31 @@ void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, StringRef File, void DwarfUnit::addSourceLine(DIE &Die, const DILocalVariable *V) { assert(V); - addSourceLine(Die, V->getLine(), V->getScope()->getFilename(), - V->getScope()->getDirectory()); + addSourceLine(Die, V->getLine(), V->getFile()); } void DwarfUnit::addSourceLine(DIE &Die, const DIGlobalVariable *G) { assert(G); - addSourceLine(Die, G->getLine(), G->getFilename(), G->getDirectory()); + addSourceLine(Die, G->getLine(), G->getFile()); } void DwarfUnit::addSourceLine(DIE &Die, const DISubprogram *SP) { assert(SP); - addSourceLine(Die, SP->getLine(), SP->getFilename(), SP->getDirectory()); + addSourceLine(Die, SP->getLine(), SP->getFile()); } void DwarfUnit::addSourceLine(DIE &Die, const DIType *Ty) { assert(Ty); - addSourceLine(Die, Ty->getLine(), Ty->getFilename(), Ty->getDirectory()); + addSourceLine(Die, Ty->getLine(), Ty->getFile()); } void DwarfUnit::addSourceLine(DIE &Die, const DIObjCProperty *Ty) { assert(Ty); - addSourceLine(Die, Ty->getLine(), Ty->getFilename(), Ty->getDirectory()); + addSourceLine(Die, Ty->getLine(), Ty->getFile()); } /* Byref variables, in Blocks, are declared by the programmer as "SomeType @@ -727,7 +763,7 @@ DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) { else if (auto *STy = dyn_cast<DISubroutineType>(Ty)) constructTypeDIE(TyDIE, STy); else if (auto *CTy = dyn_cast<DICompositeType>(Ty)) { - if (GenerateDwarfTypeUnits && !Ty->isForwardDecl()) + if (DD->generateTypeUnits() && !Ty->isForwardDecl()) if (MDString *TypeId = CTy->getRawIdentifier()) { DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy); // Skip updating the accelerator tables since this is not the full type. @@ -917,9 +953,24 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) { case dwarf::DW_TAG_enumeration_type: constructEnumTypeDIE(Buffer, CTy); break; + case dwarf::DW_TAG_variant_part: case dwarf::DW_TAG_structure_type: case dwarf::DW_TAG_union_type: case dwarf::DW_TAG_class_type: { + // Emit the discriminator for a variant part. + DIDerivedType *Discriminator = nullptr; + if (Tag == dwarf::DW_TAG_variant_part) { + Discriminator = CTy->getDiscriminator(); + if (Discriminator) { + // DWARF says: + // If the variant part has a discriminant, the discriminant is + // represented by a separate debugging information entry which is + // a child of the variant part entry. + DIE &DiscMember = constructMemberDIE(Buffer, Discriminator); + addDIEEntry(Buffer, dwarf::DW_AT_discr, DiscMember); + } + } + // Add elements to structure type. DINodeArray Elements = CTy->getElements(); for (const auto *Element : Elements) { @@ -933,6 +984,18 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) { addType(ElemDie, resolve(DDTy->getBaseType()), dwarf::DW_AT_friend); } else if (DDTy->isStaticMember()) { getOrCreateStaticMemberDIE(DDTy); + } else if (Tag == dwarf::DW_TAG_variant_part) { + // When emitting a variant part, wrap each member in + // DW_TAG_variant. + DIE &Variant = createAndAddDIE(dwarf::DW_TAG_variant, Buffer); + if (const ConstantInt *CI = + dyn_cast_or_null<ConstantInt>(DDTy->getDiscriminantValue())) { + if (isUnsignedDIType(DD, resolve(Discriminator->getBaseType()))) + addUInt(Variant, dwarf::DW_AT_discr_value, None, CI->getZExtValue()); + else + addSInt(Variant, dwarf::DW_AT_discr_value, None, CI->getSExtValue()); + } + constructMemberDIE(Variant, DDTy); } else { constructMemberDIE(Buffer, DDTy); } @@ -952,6 +1015,11 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) { if (unsigned PropertyAttributes = Property->getAttributes()) addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, None, PropertyAttributes); + } else if (auto *Composite = dyn_cast<DICompositeType>(Element)) { + if (Composite->getTag() == dwarf::DW_TAG_variant_part) { + DIE &VariantPart = createAndAddDIE(Composite->getTag(), Buffer); + constructTypeDIE(VariantPart, Composite); + } } } @@ -975,6 +1043,15 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) { Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) addTemplateParams(Buffer, CTy->getTemplateParams()); + // Add the type's non-standard calling convention. + uint8_t CC = 0; + if (CTy->isTypePassByValue()) + CC = dwarf::DW_CC_pass_by_value; + else if (CTy->isTypePassByReference()) + CC = dwarf::DW_CC_pass_by_reference; + if (CC) + addUInt(Buffer, dwarf::DW_AT_calling_convention, dwarf::DW_FORM_data1, + CC); break; } default: @@ -1152,9 +1229,8 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP, // Look at the Decl's linkage name only if we emitted it. if (DD->useAllLinkageNames()) DeclLinkageName = SPDecl->getLinkageName(); - unsigned DeclID = - getOrCreateSourceID(SPDecl->getFilename(), SPDecl->getDirectory()); - unsigned DefID = getOrCreateSourceID(SP->getFilename(), SP->getDirectory()); + unsigned DeclID = getOrCreateSourceID(SPDecl->getFile()); + unsigned DefID = getOrCreateSourceID(SP->getFile()); if (DeclID != DefID) addUInt(SPDie, dwarf::DW_AT_decl_file, None, DefID); @@ -1304,14 +1380,17 @@ void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR, // DW_AT_lower_bound and DW_AT_count attributes. int64_t LowerBound = SR->getLowerBound(); int64_t DefaultLowerBound = getDefaultLowerBound(); - int64_t Count = SR->getCount(); + int64_t Count = -1; + if (auto *CI = SR->getCount().dyn_cast<ConstantInt*>()) + Count = CI->getSExtValue(); if (DefaultLowerBound == -1 || LowerBound != DefaultLowerBound) addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, None, LowerBound); - if (Count != -1) - // FIXME: An unbounded array should reference the expression that defines - // the array. + if (auto *CV = SR->getCount().dyn_cast<DIVariable*>()) { + if (auto *CountVarDIE = getDIE(CV)) + addDIEEntry(DW_Subrange, dwarf::DW_AT_count, *CountVarDIE); + } else if (Count != -1) addUInt(DW_Subrange, dwarf::DW_AT_count, None, Count); } @@ -1320,16 +1399,49 @@ DIE *DwarfUnit::getIndexTyDie() { return IndexTyDie; // Construct an integer type to use for indexes. IndexTyDie = &createAndAddDIE(dwarf::DW_TAG_base_type, getUnitDie()); - addString(*IndexTyDie, dwarf::DW_AT_name, "sizetype"); + StringRef Name = "__ARRAY_SIZE_TYPE__"; + addString(*IndexTyDie, dwarf::DW_AT_name, Name); addUInt(*IndexTyDie, dwarf::DW_AT_byte_size, None, sizeof(int64_t)); addUInt(*IndexTyDie, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, dwarf::DW_ATE_unsigned); + DD->addAccelType(Name, *IndexTyDie, /*Flags*/ 0); return IndexTyDie; } +/// Returns true if the vector's size differs from the sum of sizes of elements +/// the user specified. This can occur if the vector has been rounded up to +/// fit memory alignment constraints. +static bool hasVectorBeenPadded(const DICompositeType *CTy) { + assert(CTy && CTy->isVector() && "Composite type is not a vector"); + const uint64_t ActualSize = CTy->getSizeInBits(); + + // Obtain the size of each element in the vector. + DIType *BaseTy = CTy->getBaseType().resolve(); + assert(BaseTy && "Unknown vector element type."); + const uint64_t ElementSize = BaseTy->getSizeInBits(); + + // Locate the number of elements in the vector. + const DINodeArray Elements = CTy->getElements(); + assert(Elements.size() == 1 && + Elements[0]->getTag() == dwarf::DW_TAG_subrange_type && + "Invalid vector element array, expected one element of type subrange"); + const auto Subrange = cast<DISubrange>(Elements[0]); + const auto CI = Subrange->getCount().get<ConstantInt *>(); + const int32_t NumVecElements = CI->getSExtValue(); + + // Ensure we found the element count and that the actual size is wide + // enough to contain the requested size. + assert(ActualSize >= (NumVecElements * ElementSize) && "Invalid vector size"); + return ActualSize != (NumVecElements * ElementSize); +} + void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy) { - if (CTy->isVector()) + if (CTy->isVector()) { addFlag(Buffer, dwarf::DW_AT_GNU_vector); + if (hasVectorBeenPadded(CTy)) + addUInt(Buffer, dwarf::DW_AT_byte_size, None, + CTy->getSizeInBits() / CHAR_BIT); + } // Emit the element type. addType(Buffer, resolve(CTy->getBaseType())); @@ -1350,6 +1462,15 @@ void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy) { } void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) { + const DIType *DTy = resolve(CTy->getBaseType()); + bool IsUnsigned = DTy && isUnsignedDIType(DD, DTy); + if (DTy) { + if (DD->getDwarfVersion() >= 3) + addType(Buffer, DTy); + if (DD->getDwarfVersion() >= 4 && (CTy->getFlags() & DINode::FlagFixedEnum)) + addFlag(Buffer, dwarf::DW_AT_enum_class); + } + DINodeArray Elements = CTy->getElements(); // Add enumerators to enumeration type. @@ -1359,16 +1480,10 @@ void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) { DIE &Enumerator = createAndAddDIE(dwarf::DW_TAG_enumerator, Buffer); StringRef Name = Enum->getName(); addString(Enumerator, dwarf::DW_AT_name, Name); - int64_t Value = Enum->getValue(); - addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, - Value); + auto Value = static_cast<uint64_t>(Enum->getValue()); + addConstantValue(Enumerator, IsUnsigned, Value); } } - const DIType *DTy = resolve(CTy->getBaseType()); - if (DTy) { - addType(Buffer, DTy); - addFlag(Buffer, dwarf::DW_AT_enum_class); - } } void DwarfUnit::constructContainingTypeDIEs() { @@ -1385,7 +1500,7 @@ void DwarfUnit::constructContainingTypeDIEs() { } } -void DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) { +DIE &DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) { DIE &MemberDie = createAndAddDIE(DT->getTag(), Buffer); StringRef Name = DT->getName(); if (!Name.empty()) @@ -1490,6 +1605,8 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) { if (DT->isArtificial()) addFlag(MemberDie, dwarf::DW_AT_artificial); + + return MemberDie; } DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) { @@ -1542,18 +1659,18 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) { void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) { // Emit size of content not including length itself Asm->OutStreamer->AddComment("Length of Unit"); - Asm->EmitInt32(getHeaderSize() + getUnitDie().getSize()); + Asm->emitInt32(getHeaderSize() + getUnitDie().getSize()); Asm->OutStreamer->AddComment("DWARF version number"); unsigned Version = DD->getDwarfVersion(); - Asm->EmitInt16(Version); + Asm->emitInt16(Version); // DWARF v5 reorders the address size and adds a unit type. if (Version >= 5) { Asm->OutStreamer->AddComment("DWARF Unit Type"); - Asm->EmitInt8(UT); + Asm->emitInt8(UT); Asm->OutStreamer->AddComment("Address Size (in bytes)"); - Asm->EmitInt8(Asm->MAI->getCodePointerSize()); + Asm->emitInt8(Asm->MAI->getCodePointerSize()); } // We share one abbreviations table across all units so it's always at the @@ -1562,14 +1679,14 @@ void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) { Asm->OutStreamer->AddComment("Offset Into Abbrev. Section"); const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); if (UseOffsets) - Asm->EmitInt32(0); + Asm->emitInt32(0); else Asm->emitDwarfSymbolReference( TLOF.getDwarfAbbrevSection()->getBeginSymbol(), false); if (Version <= 4) { Asm->OutStreamer->AddComment("Address Size (in bytes)"); - Asm->EmitInt8(Asm->MAI->getCodePointerSize()); + Asm->emitInt8(Asm->MAI->getCodePointerSize()); } } @@ -1628,3 +1745,19 @@ const MCSymbol *DwarfUnit::getCrossSectionRelativeBaseAddress() const { return nullptr; return getSection()->getBeginSymbol(); } + +void DwarfUnit::addStringOffsetsStart() { + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + addSectionLabel(getUnitDie(), dwarf::DW_AT_str_offsets_base, + DU->getStringOffsetsStartSym(), + TLOF.getDwarfStrOffSection()->getBeginSymbol()); +} + +void DwarfUnit::addRnglistsBase() { + assert(DD->getDwarfVersion() >= 5 && + "DW_AT_rnglists_base requires DWARF version 5 or later"); + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + addSectionLabel(getUnitDie(), dwarf::DW_AT_rnglists_base, + DU->getRnglistsTableBaseSym(), + TLOF.getDwarfRnglistsSection()->getBeginSymbol()); +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h index 4cc01b3298d4..69696f626536 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -98,7 +98,7 @@ protected: /// corresponds to the MDNode mapped with the subprogram DIE. DenseMap<DIE *, const DINode *> ContainingTypeMap; - DwarfUnit(dwarf::Tag, const DICompileUnit *CU, AsmPrinter *A, DwarfDebug *DW, + DwarfUnit(dwarf::Tag, const DICompileUnit *Node, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU); bool applySubprogramDefinitionAttributes(const DISubprogram *SP, DIE &SPDie); @@ -112,6 +112,8 @@ public: uint16_t getLanguage() const { return CUNode->getSourceLanguage(); } const DICompileUnit *getCUNode() const { return CUNode; } + uint16_t getDwarfVersion() const { return DD->getDwarfVersion(); } + /// Return true if this compile unit has something to write out. bool hasContent() const { return getUnitDie().hasChildren(); } @@ -185,7 +187,7 @@ public: /// Add a dwarf op address data and value using the form given and an /// op of either DW_FORM_addr or DW_FORM_GNU_addr_index. - void addOpAddress(DIELoc &Die, const MCSymbol *Label); + void addOpAddress(DIELoc &Die, const MCSymbol *Sym); /// Add a label delta attribute data and value. void addLabelDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi, @@ -201,14 +203,13 @@ public: void addDIETypeSignature(DIE &Die, uint64_t Signature); /// Add block data. - void addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Block); + void addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Loc); /// Add block data. void addBlock(DIE &Die, dwarf::Attribute Attribute, DIEBlock *Block); /// Add location information to specified debug information entry. - void addSourceLine(DIE &Die, unsigned Line, StringRef File, - StringRef Directory); + void addSourceLine(DIE &Die, unsigned Line, const DIFile *File); void addSourceLine(DIE &Die, const DILocalVariable *V); void addSourceLine(DIE &Die, const DIGlobalVariable *G); void addSourceLine(DIE &Die, const DISubprogram *SP); @@ -259,7 +260,7 @@ public: bool SkipSPAttributes = false); /// Find existing DIE or create new DIE for the given type. - DIE *getOrCreateTypeDIE(const MDNode *N); + DIE *getOrCreateTypeDIE(const MDNode *TyNode); /// Get context owner's DIE. DIE *getOrCreateContextDIE(const DIScope *Context); @@ -274,6 +275,10 @@ public: /// call insertDIE if MD is not null. DIE &createAndAddDIE(unsigned Tag, DIE &Parent, const DINode *N = nullptr); + bool useSegmentedStringOffsetsTable() const { + return DD->useSegmentedStringOffsetsTable(); + } + /// Compute the size of a header for this unit, not including the initial /// length field. virtual unsigned getHeaderSize() const { @@ -287,6 +292,12 @@ public: /// Emit the header for this unit, not including the initial length field. virtual void emitHeader(bool UseOffsets) = 0; + /// Add the DW_AT_str_offsets_base attribute to the unit DIE. + void addStringOffsetsStart(); + + /// Add the DW_AT_rnglists_base attribute to the unit DIE. + void addRnglistsBase(); + virtual DwarfCompileUnit &getCU() = 0; void constructTypeDIE(DIE &Buffer, const DICompositeType *CTy); @@ -300,15 +311,19 @@ public: const MCSymbol *Label, const MCSymbol *Sec); + /// If the \p File has an MD5 checksum, return it as an MD5Result + /// allocated in the MCContext. + MD5::MD5Result *getMD5AsBytes(const DIFile *File) const; + protected: ~DwarfUnit(); /// Create new static data member DIE. DIE *getOrCreateStaticMemberDIE(const DIDerivedType *DT); - /// Look up the source ID with the given directory and source file names. If - /// none currently exists, create a new ID and insert it in the line table. - virtual unsigned getOrCreateSourceID(StringRef File, StringRef Directory) = 0; + /// Look up the source ID for the given file. If none currently exists, + /// create a new ID and insert it in the line table. + virtual unsigned getOrCreateSourceID(const DIFile *File) = 0; /// Look in the DwarfDebug map for the MDNode that corresponds to the /// reference. @@ -327,11 +342,11 @@ protected: private: void constructTypeDIE(DIE &Buffer, const DIBasicType *BTy); void constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy); - void constructTypeDIE(DIE &Buffer, const DISubroutineType *DTy); + void constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy); void constructSubrangeDIE(DIE &Buffer, const DISubrange *SR, DIE *IndexTy); void constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy); void constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy); - void constructMemberDIE(DIE &Buffer, const DIDerivedType *DT); + DIE &constructMemberDIE(DIE &Buffer, const DIDerivedType *DT); void constructTemplateTypeParameterDIE(DIE &Buffer, const DITemplateTypeParameter *TP); void constructTemplateValueParameterDIE(DIE &Buffer, @@ -357,8 +372,9 @@ class DwarfTypeUnit final : public DwarfUnit { const DIE *Ty; DwarfCompileUnit &CU; MCDwarfDwoLineTable *SplitLineTable; + bool UsedLineTable = false; - unsigned getOrCreateSourceID(StringRef File, StringRef Directory) override; + unsigned getOrCreateSourceID(const DIFile *File) override; bool isDwoUnit() const override; public: diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp index 3cdab57bca70..65de9d7e65a4 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -20,7 +20,6 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/TargetLoweringObjectFile.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCAsmInfo.h" @@ -30,6 +29,7 @@ #include "llvm/MC/MCTargetOptions.h" #include "llvm/Support/Casting.h" #include "llvm/Support/LEB128.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -58,10 +58,10 @@ unsigned EHStreamer::sharedTypeIDs(const LandingPadInfo *L, /// Compute the actions table and gather the first action index for each landing /// pad site. -unsigned EHStreamer:: -computeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads, - SmallVectorImpl<ActionEntry> &Actions, - SmallVectorImpl<unsigned> &FirstActions) { +void EHStreamer::computeActionsTable( + const SmallVectorImpl<const LandingPadInfo *> &LandingPads, + SmallVectorImpl<ActionEntry> &Actions, + SmallVectorImpl<unsigned> &FirstActions) { // The action table follows the call-site table in the LSDA. The individual // records are of two types: // @@ -149,7 +149,7 @@ computeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads, FirstAction = SizeActions + SizeSiteActions - SizeAction + 1; } // else identical - re-use previous FirstAction - // Information used when created the call-site table. The action record + // Information used when creating the call-site table. The action record // field of the call site record is the offset of the first associated // action record, relative to the start of the actions table. This value is // biased by 1 (1 indicating the start of the actions table), and 0 @@ -161,8 +161,6 @@ computeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads, PrevLPI = LPI; } - - return SizeActions; } /// Return `true' if this is a call to a function marked `nounwind'. Return @@ -361,55 +359,33 @@ void EHStreamer::emitExceptionTable() { LandingPads.push_back(&PadInfos[i]); // Order landing pads lexicographically by type id. - std::sort(LandingPads.begin(), LandingPads.end(), - [](const LandingPadInfo *L, - const LandingPadInfo *R) { return L->TypeIds < R->TypeIds; }); + llvm::sort(LandingPads.begin(), LandingPads.end(), + [](const LandingPadInfo *L, + const LandingPadInfo *R) { return L->TypeIds < R->TypeIds; }); // Compute the actions table and gather the first action index for each // landing pad site. SmallVector<ActionEntry, 32> Actions; SmallVector<unsigned, 64> FirstActions; - unsigned SizeActions = - computeActionsTable(LandingPads, Actions, FirstActions); + computeActionsTable(LandingPads, Actions, FirstActions); // Compute the call-site table. SmallVector<CallSiteEntry, 64> CallSites; computeCallSiteTable(CallSites, LandingPads, FirstActions); - // Final tallies. - - // Call sites. bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj; - bool HaveTTData = IsSJLJ ? (!TypeInfos.empty() || !FilterIds.empty()) : true; - - unsigned CallSiteTableLength; - if (IsSJLJ) - CallSiteTableLength = 0; - else { - unsigned SiteStartSize = 4; // dwarf::DW_EH_PE_udata4 - unsigned SiteLengthSize = 4; // dwarf::DW_EH_PE_udata4 - unsigned LandingPadSize = 4; // dwarf::DW_EH_PE_udata4 - CallSiteTableLength = - CallSites.size() * (SiteStartSize + SiteLengthSize + LandingPadSize); - } - - for (unsigned i = 0, e = CallSites.size(); i < e; ++i) { - CallSiteTableLength += getULEB128Size(CallSites[i].Action); - if (IsSJLJ) - CallSiteTableLength += getULEB128Size(i); - } + unsigned CallSiteEncoding = + IsSJLJ ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_uleb128; + bool HaveTTData = !TypeInfos.empty() || !FilterIds.empty(); // Type infos. MCSection *LSDASection = Asm->getObjFileLowering().getLSDASection(); unsigned TTypeEncoding; - unsigned TypeFormatSize; if (!HaveTTData) { - // For SjLj exceptions, if there is no TypeInfo, then we just explicitly say - // that we're omitting that bit. + // If there is no TypeInfo, then we just explicitly say that we're omitting + // that bit. TTypeEncoding = dwarf::DW_EH_PE_omit; - // dwarf::DW_EH_PE_absptr - TypeFormatSize = Asm->getDataLayout().getPointerSize(); } else { // Okay, we have actual filters or typeinfos to emit. As such, we need to // pick a type encoding for them. We're about to emit a list of pointers to @@ -439,7 +415,6 @@ void EHStreamer::emitExceptionTable() { // in target-independent code. // TTypeEncoding = Asm->getObjFileLowering().getTTypeEncoding(); - TypeFormatSize = Asm->GetSizeOfEncodedValue(TTypeEncoding); } // Begin the exception table. @@ -460,64 +435,35 @@ void EHStreamer::emitExceptionTable() { Asm->EmitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart"); Asm->EmitEncodingByte(TTypeEncoding, "@TType"); - // The type infos need to be aligned. GCC does this by inserting padding just - // before the type infos. However, this changes the size of the exception - // table, so you need to take this into account when you output the exception - // table size. However, the size is output using a variable length encoding. - // So by increasing the size by inserting padding, you may increase the number - // of bytes used for writing the size. If it increases, say by one byte, then - // you now need to output one less byte of padding to get the type infos - // aligned. However this decreases the size of the exception table. This - // changes the value you have to output for the exception table size. Due to - // the variable length encoding, the number of bytes used for writing the - // length may decrease. If so, you then have to increase the amount of - // padding. And so on. If you look carefully at the GCC code you will see that - // it indeed does this in a loop, going on and on until the values stabilize. - // We chose another solution: don't output padding inside the table like GCC - // does, instead output it before the table. - unsigned SizeTypes = TypeInfos.size() * TypeFormatSize; - unsigned CallSiteTableLengthSize = getULEB128Size(CallSiteTableLength); - unsigned TTypeBaseOffset = - sizeof(int8_t) + // Call site format - CallSiteTableLengthSize + // Call site table length size - CallSiteTableLength + // Call site table length - SizeActions + // Actions size - SizeTypes; - unsigned TTypeBaseOffsetSize = getULEB128Size(TTypeBaseOffset); - unsigned TotalSize = - sizeof(int8_t) + // LPStart format - sizeof(int8_t) + // TType format - (HaveTTData ? TTypeBaseOffsetSize : 0) + // TType base offset size - TTypeBaseOffset; // TType base offset - unsigned PadBytes = (4 - TotalSize) & 3; - + MCSymbol *TTBaseLabel = nullptr; if (HaveTTData) { - // Account for any extra padding that will be added to the call site table - // length. - Asm->EmitPaddedULEB128(TTypeBaseOffset, TTypeBaseOffsetSize + PadBytes, - "@TType base offset"); - PadBytes = 0; + // N.B.: There is a dependency loop between the size of the TTBase uleb128 + // here and the amount of padding before the aligned type table. The + // assembler must sometimes pad this uleb128 or insert extra padding before + // the type table. See PR35809 or GNU as bug 4029. + MCSymbol *TTBaseRefLabel = Asm->createTempSymbol("ttbaseref"); + TTBaseLabel = Asm->createTempSymbol("ttbase"); + Asm->EmitLabelDifferenceAsULEB128(TTBaseLabel, TTBaseRefLabel); + Asm->OutStreamer->EmitLabel(TTBaseRefLabel); } bool VerboseAsm = Asm->OutStreamer->isVerboseAsm(); + // Emit the landing pad call site table. + MCSymbol *CstBeginLabel = Asm->createTempSymbol("cst_begin"); + MCSymbol *CstEndLabel = Asm->createTempSymbol("cst_end"); + Asm->EmitEncodingByte(CallSiteEncoding, "Call site"); + Asm->EmitLabelDifferenceAsULEB128(CstEndLabel, CstBeginLabel); + Asm->OutStreamer->EmitLabel(CstBeginLabel); + // SjLj Exception handling if (IsSJLJ) { - Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site"); - - // Add extra padding if it wasn't added to the TType base offset. - Asm->EmitPaddedULEB128(CallSiteTableLength, - CallSiteTableLengthSize + PadBytes, - "Call site table length"); - - // Emit the landing pad site information. unsigned idx = 0; for (SmallVectorImpl<CallSiteEntry>::const_iterator I = CallSites.begin(), E = CallSites.end(); I != E; ++I, ++idx) { const CallSiteEntry &S = *I; - // Offset of the landing pad, counted in 16-byte bundles relative to the - // @LPStart address. + // Index of the call site entry. if (VerboseAsm) { Asm->OutStreamer->AddComment(">> Call Site " + Twine(idx) + " <<"); Asm->OutStreamer->AddComment(" On exception at call site "+Twine(idx)); @@ -557,14 +503,6 @@ void EHStreamer::emitExceptionTable() { // A missing entry in the call-site table indicates that a call is not // supposed to throw. - // Emit the landing pad call site table. - Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site"); - - // Add extra padding if it wasn't added to the TType base offset. - Asm->EmitPaddedULEB128(CallSiteTableLength, - CallSiteTableLengthSize + PadBytes, - "Call site table length"); - unsigned Entry = 0; for (SmallVectorImpl<CallSiteEntry>::const_iterator I = CallSites.begin(), E = CallSites.end(); I != E; ++I) { @@ -579,29 +517,27 @@ void EHStreamer::emitExceptionTable() { if (!EndLabel) EndLabel = Asm->getFunctionEnd(); - // Offset of the call site relative to the previous call site, counted in - // number of 16-byte bundles. The first call site is counted relative to - // the start of the procedure fragment. + // Offset of the call site relative to the start of the procedure. if (VerboseAsm) Asm->OutStreamer->AddComment(">> Call Site " + Twine(++Entry) + " <<"); - Asm->EmitLabelDifference(BeginLabel, EHFuncBeginSym, 4); + Asm->EmitLabelDifferenceAsULEB128(BeginLabel, EHFuncBeginSym); if (VerboseAsm) Asm->OutStreamer->AddComment(Twine(" Call between ") + BeginLabel->getName() + " and " + EndLabel->getName()); - Asm->EmitLabelDifference(EndLabel, BeginLabel, 4); + Asm->EmitLabelDifferenceAsULEB128(EndLabel, BeginLabel); - // Offset of the landing pad, counted in 16-byte bundles relative to the - // @LPStart address. + // Offset of the landing pad relative to the start of the procedure. if (!S.LPad) { if (VerboseAsm) Asm->OutStreamer->AddComment(" has no landing pad"); - Asm->OutStreamer->EmitIntValue(0, 4/*size*/); + Asm->EmitULEB128(0); } else { if (VerboseAsm) Asm->OutStreamer->AddComment(Twine(" jumps to ") + S.LPad->LandingPadLabel->getName()); - Asm->EmitLabelDifference(S.LPad->LandingPadLabel, EHFuncBeginSym, 4); + Asm->EmitLabelDifferenceAsULEB128(S.LPad->LandingPadLabel, + EHFuncBeginSym); } // Offset of the first associated action record, relative to the start of @@ -617,6 +553,7 @@ void EHStreamer::emitExceptionTable() { Asm->EmitULEB128(S.Action); } } + Asm->OutStreamer->EmitLabel(CstEndLabel); // Emit the Action Table. int Entry = 0; @@ -660,12 +597,15 @@ void EHStreamer::emitExceptionTable() { Asm->EmitSLEB128(Action.NextAction); } - emitTypeInfos(TTypeEncoding); + if (HaveTTData) { + Asm->EmitAlignment(2); + emitTypeInfos(TTypeEncoding, TTBaseLabel); + } Asm->EmitAlignment(2); } -void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) { +void EHStreamer::emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) { const MachineFunction *MF = Asm->MF; const std::vector<const GlobalValue *> &TypeInfos = MF->getTypeInfos(); const std::vector<unsigned> &FilterIds = MF->getFilterIds(); @@ -687,6 +627,8 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) { Asm->EmitTTypeReference(GV, TTypeEncoding); } + Asm->OutStreamer->EmitLabel(TTBaseLabel); + // Emit the Exception Specifications. if (VerboseAsm && !FilterIds.empty()) { Asm->OutStreamer->AddComment(">> Filter TypeInfos <<"); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h index 7962b761d8de..b89421a1e067 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h @@ -72,9 +72,9 @@ protected: /// Compute the actions table and gather the first action index for each /// landing pad site. - unsigned computeActionsTable(const SmallVectorImpl<const LandingPadInfo*>&LPs, - SmallVectorImpl<ActionEntry> &Actions, - SmallVectorImpl<unsigned> &FirstActions); + void computeActionsTable(const SmallVectorImpl<const LandingPadInfo *> &LandingPads, + SmallVectorImpl<ActionEntry> &Actions, + SmallVectorImpl<unsigned> &FirstActions); void computePadMap(const SmallVectorImpl<const LandingPadInfo *> &LandingPads, RangeMapType &PadMap); @@ -86,7 +86,7 @@ protected: /// no entry and must not be contained in the try-range of any entry - they /// form gaps in the table. Entries must be ordered by try-range address. void computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, - const SmallVectorImpl<const LandingPadInfo *> &LPs, + const SmallVectorImpl<const LandingPadInfo *> &LandingPads, const SmallVectorImpl<unsigned> &FirstActions); /// Emit landing pads and actions. @@ -110,9 +110,9 @@ protected: /// catches in the function. This tables is reversed indexed base 1. void emitExceptionTable(); - virtual void emitTypeInfos(unsigned TTypeEncoding); + virtual void emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel); - // Helpers for for identifying what kind of clause an EH typeid or selector + // Helpers for identifying what kind of clause an EH typeid or selector // corresponds to. Negative selectors are for filter clauses, the zero // selector is for cleanups, and positive selectors are for catch clauses. static bool isFilterEHSelector(int Selector) { return Selector < 0; } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp index e459c02c9a6e..49cc376fcc98 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp @@ -19,7 +19,6 @@ #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/GCs.h" -#include "llvm/CodeGen/TargetLoweringObjectFile.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" @@ -27,6 +26,7 @@ #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Target/TargetLoweringObjectFile.h" using namespace llvm; @@ -77,7 +77,7 @@ void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info, // Emit PointCount. OS.AddComment("safe point count"); - AP.EmitInt16(MD.size()); + AP.emitInt16(MD.size()); // And each safe point... for (GCFunctionInfo::iterator PI = MD.begin(), PE = MD.end(); PI != PE; @@ -94,7 +94,7 @@ void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info, // Emit the stack frame size. OS.AddComment("stack frame size (in words)"); - AP.EmitInt16(MD.getFrameSize() / IntPtrSize); + AP.emitInt16(MD.getFrameSize() / IntPtrSize); // Emit stack arity, i.e. the number of stacked arguments. unsigned RegisteredArgs = IntPtrSize == 4 ? 5 : 6; @@ -102,11 +102,11 @@ void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info, ? MD.getFunction().arg_size() - RegisteredArgs : 0; OS.AddComment("stack arity"); - AP.EmitInt16(StackArity); + AP.emitInt16(StackArity); // Emit the number of live roots in the function. OS.AddComment("live root count"); - AP.EmitInt16(MD.live_size(PI)); + AP.emitInt16(MD.live_size(PI)); // And for each live root... for (GCFunctionInfo::live_iterator LI = MD.live_begin(PI), @@ -114,7 +114,7 @@ void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info, LI != LE; ++LI) { // Emit live root's offset within the stack frame. OS.AddComment("stack index (offset / wordsize)"); - AP.EmitInt16(LI->StackOffset / IntPtrSize); + AP.emitInt16(LI->StackOffset / IntPtrSize); } } } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index e0cc241dd23f..59a57ed30d10 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -18,7 +18,6 @@ #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/CodeGen/GCs.h" -#include "llvm/CodeGen/TargetLoweringObjectFile.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/Mangler.h" @@ -27,6 +26,7 @@ #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include <cctype> #include <cstddef> #include <cstdint> @@ -129,7 +129,7 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info, // Very rude! report_fatal_error(" Too much descriptor for ocaml GC"); } - AP.EmitInt16(NumDescriptors); + AP.emitInt16(NumDescriptors); AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3); for (GCModuleInfo::FuncInfoVec::iterator I = Info.funcinfo_begin(), @@ -166,8 +166,8 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info, } AP.OutStreamer->EmitSymbolValue(J->Label, IntPtrSize); - AP.EmitInt16(FrameSize); - AP.EmitInt16(LiveCount); + AP.emitInt16(FrameSize); + AP.emitInt16(LiveCount); for (GCFunctionInfo::live_iterator K = FI.live_begin(J), KE = FI.live_end(J); @@ -178,7 +178,7 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info, "GC root stack offset is outside of fixed stack frame and out " "of range for ocaml GC!"); } - AP.EmitInt16(K->StackOffset); + AP.emitInt16(K->StackOffset); } AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp new file mode 100644 index 000000000000..18d37caf57ee --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp @@ -0,0 +1,45 @@ +//===-- CodeGen/AsmPrinter/WinCFGuard.cpp - Control Flow Guard Impl ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing Win64 exception info into asm files. +// +//===----------------------------------------------------------------------===// + +#include "WinCFGuard.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Metadata.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCStreamer.h" + +#include <vector> + +using namespace llvm; + +WinCFGuard::WinCFGuard(AsmPrinter *A) : AsmPrinterHandler(), Asm(A) {} + +WinCFGuard::~WinCFGuard() {} + +void WinCFGuard::endModule() { + const Module *M = Asm->MMI->getModule(); + std::vector<const Function *> Functions; + for (const Function &F : *M) + if (F.hasAddressTaken()) + Functions.push_back(&F); + if (Functions.empty()) + return; + auto &OS = *Asm->OutStreamer; + OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGFIDsSection()); + for (const Function *F : Functions) + OS.EmitCOFFSymbolIndex(Asm->getSymbol(F)); +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h new file mode 100644 index 000000000000..124e8f04bfad --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h @@ -0,0 +1,54 @@ +//===-- WinCFGuard.h - Windows Control Flow Guard Handling ----*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing windows exception info into asm files. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_WINCFGUARD_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_WINCFGUARD_H + +#include "AsmPrinterHandler.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { + +class LLVM_LIBRARY_VISIBILITY WinCFGuard : public AsmPrinterHandler { + /// Target of directive emission. + AsmPrinter *Asm; + +public: + WinCFGuard(AsmPrinter *A); + ~WinCFGuard() override; + + void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {} + + /// Emit the Control Flow Guard function ID table + void endModule() override; + + /// Gather pre-function debug information. + /// Every beginFunction(MF) call should be followed by an endFunction(MF) + /// call. + void beginFunction(const MachineFunction *MF) override {} + + /// Gather post-function debug information. + /// Please note that some AsmPrinter implementations may not call + /// beginFunction at all. + void endFunction(const MachineFunction *MF) override {} + + /// Process beginning of an instruction. + void beginInstruction(const MachineInstr *MI) override {} + + /// Process end of an instruction. + void endInstruction() override {} +}; + +} // namespace llvm + +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp index a6a8e84a949f..eff73a58d8d2 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp @@ -21,7 +21,6 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetLowering.h" -#include "llvm/CodeGen/TargetLoweringObjectFile.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/DataLayout.h" @@ -35,6 +34,7 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h index 371061c2c2ec..eed3c4453ffc 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h @@ -100,7 +100,7 @@ public: /// Gather and emit post-function exception information. void endFunction(const MachineFunction *) override; - /// \brief Emit target-specific EH funclet machinery. + /// Emit target-specific EH funclet machinery. void beginFunclet(const MachineBasicBlock &MBB, MCSymbol *Sym) override; void endFunclet() override; }; diff --git a/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp b/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp index 7042bc997223..f2615edaece2 100644 --- a/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -98,8 +98,8 @@ namespace { CreateCmpXchgInstFun CreateCmpXchg); bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); - bool isIdempotentRMW(AtomicRMWInst *AI); - bool simplifyIdempotentRMW(AtomicRMWInst *AI); + bool isIdempotentRMW(AtomicRMWInst *RMWI); + bool simplifyIdempotentRMW(AtomicRMWInst *RMWI); bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, unsigned Align, Value *PointerOperand, Value *ValueOperand, @@ -379,8 +379,8 @@ LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) { NewLI->setAlignment(LI->getAlignment()); NewLI->setVolatile(LI->isVolatile()); NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID()); - DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n"); - + LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n"); + Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType()); LI->replaceAllUsesWith(NewVal); LI->eraseFromParent(); @@ -462,7 +462,7 @@ StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) { NewSI->setAlignment(SI->getAlignment()); NewSI->setVolatile(SI->isVolatile()); NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID()); - DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n"); + LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n"); SI->eraseFromParent(); return NewSI; } @@ -943,7 +943,7 @@ AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst * CI->getSyncScopeID()); NewCI->setVolatile(CI->isVolatile()); NewCI->setWeak(CI->isWeak()); - DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n"); + LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n"); Value *OldVal = Builder.CreateExtractValue(NewCI, 0); Value *Succ = Builder.CreateExtractValue(NewCI, 1); diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp index 7f358a679366..c7a0c6457164 100644 --- a/contrib/llvm/lib/CodeGen/BranchFolding.cpp +++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp @@ -152,7 +152,7 @@ BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist, void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) { assert(MBB->pred_empty() && "MBB must be dead!"); - DEBUG(dbgs() << "\nRemoving MBB: " << *MBB); + LLVM_DEBUG(dbgs() << "\nRemoving MBB: " << *MBB); MachineFunction *MF = MBB->getParent(); // drop all successors. @@ -164,7 +164,7 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) { // Remove the block. MF->erase(MBB); - FuncletMembership.erase(MBB); + EHScopeMembership.erase(MBB); if (MLI) MLI->removeBlock(MBB); } @@ -199,8 +199,8 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, MadeChange |= MBB.CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); } - // Recalculate funclet membership. - FuncletMembership = getFuncletMembership(MF); + // Recalculate EH scope membership. + EHScopeMembership = getEHScopeMembership(MF); bool MadeChangeThisIteration = true; while (MadeChangeThisIteration) { @@ -296,6 +296,11 @@ static unsigned HashEndOfMBB(const MachineBasicBlock &MBB) { return HashMachineInstr(*I); } +/// Whether MI should be counted as an instruction when calculating common tail. +static bool countsAsInstruction(const MachineInstr &MI) { + return !(MI.isDebugValue() || MI.isCFIInstruction()); +} + /// ComputeCommonTailLength - Given two machine basic blocks, compute the number /// of instructions they actually have in common together at their end. Return /// iterators for the first shared instruction in each block. @@ -310,26 +315,27 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, while (I1 != MBB1->begin() && I2 != MBB2->begin()) { --I1; --I2; // Skip debugging pseudos; necessary to avoid changing the code. - while (I1->isDebugValue()) { + while (!countsAsInstruction(*I1)) { if (I1==MBB1->begin()) { - while (I2->isDebugValue()) { - if (I2==MBB2->begin()) + while (!countsAsInstruction(*I2)) { + if (I2==MBB2->begin()) { // I1==DBG at begin; I2==DBG at begin - return TailLen; + goto SkipTopCFIAndReturn; + } --I2; } ++I2; // I1==DBG at begin; I2==non-DBG, or first of DBGs not at begin - return TailLen; + goto SkipTopCFIAndReturn; } --I1; } // I1==first (untested) non-DBG preceding known match - while (I2->isDebugValue()) { + while (!countsAsInstruction(*I2)) { if (I2==MBB2->begin()) { ++I1; // I1==non-DBG, or first of DBGs not at begin; I2==DBG at begin - return TailLen; + goto SkipTopCFIAndReturn; } --I2; } @@ -352,7 +358,7 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, // I1==MBB1->begin() work as expected.) if (I1 == MBB1->begin() && I2 != MBB2->begin()) { --I2; - while (I2->isDebugValue()) { + while (I2->isDebugInstr()) { if (I2 == MBB2->begin()) return TailLen; --I2; @@ -361,13 +367,44 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, } if (I2 == MBB2->begin() && I1 != MBB1->begin()) { --I1; - while (I1->isDebugValue()) { + while (I1->isDebugInstr()) { if (I1 == MBB1->begin()) return TailLen; --I1; } ++I1; } + +SkipTopCFIAndReturn: + // Ensure that I1 and I2 do not point to a CFI_INSTRUCTION. This can happen if + // I1 and I2 are non-identical when compared and then one or both of them ends + // up pointing to a CFI instruction after being incremented. For example: + /* + BB1: + ... + INSTRUCTION_A + ADD32ri8 <- last common instruction + ... + BB2: + ... + INSTRUCTION_B + CFI_INSTRUCTION + ADD32ri8 <- last common instruction + ... + */ + // When INSTRUCTION_A and INSTRUCTION_B are compared as not equal, after + // incrementing the iterators, I1 will point to ADD, however I2 will point to + // the CFI instruction. Later on, this leads to BB2 being 'hacked off' at the + // wrong place (in ReplaceTailWithBranchTo()) which results in losing this CFI + // instruction. + while (I1 != MBB1->end() && I1->isCFIInstruction()) { + ++I1; + } + + while (I2 != MBB2->end() && I2->isCFIInstruction()) { + ++I2; + } + return TailLen; } @@ -438,11 +475,11 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, if (UpdateLiveIns) computeAndAddLiveIns(LiveRegs, *NewMBB); - // Add the new block to the funclet. - const auto &FuncletI = FuncletMembership.find(&CurMBB); - if (FuncletI != FuncletMembership.end()) { - auto n = FuncletI->second; - FuncletMembership[NewMBB] = n; + // Add the new block to the EH scope. + const auto &EHScopeI = EHScopeMembership.find(&CurMBB); + if (EHScopeI != EHScopeMembership.end()) { + auto n = EHScopeI->second; + EHScopeMembership[NewMBB] = n; } return NewMBB; @@ -454,7 +491,7 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I, MachineBasicBlock::iterator E) { unsigned Time = 0; for (; I != E; ++I) { - if (I->isDebugValue()) + if (!countsAsInstruction(*I)) continue; if (I->isCall()) Time += 10; @@ -589,7 +626,7 @@ static bool blockEndsInUnreachable(const MachineBasicBlock *MBB) { /// SuccBB A common successor of MBB1, MBB2 which are in a canonical form /// relative to SuccBB /// PredBB The layout predecessor of SuccBB, if any. -/// FuncletMembership map from block to funclet #. +/// EHScopeMembership map from block to EH scope #. /// AfterPlacement True if we are merging blocks after layout. Stricter /// thresholds apply to prevent undoing tail-duplication. static bool @@ -598,24 +635,24 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2, MachineBasicBlock::iterator &I1, MachineBasicBlock::iterator &I2, MachineBasicBlock *SuccBB, MachineBasicBlock *PredBB, - DenseMap<const MachineBasicBlock *, int> &FuncletMembership, + DenseMap<const MachineBasicBlock *, int> &EHScopeMembership, bool AfterPlacement) { - // It is never profitable to tail-merge blocks from two different funclets. - if (!FuncletMembership.empty()) { - auto Funclet1 = FuncletMembership.find(MBB1); - assert(Funclet1 != FuncletMembership.end()); - auto Funclet2 = FuncletMembership.find(MBB2); - assert(Funclet2 != FuncletMembership.end()); - if (Funclet1->second != Funclet2->second) + // It is never profitable to tail-merge blocks from two different EH scopes. + if (!EHScopeMembership.empty()) { + auto EHScope1 = EHScopeMembership.find(MBB1); + assert(EHScope1 != EHScopeMembership.end()); + auto EHScope2 = EHScopeMembership.find(MBB2); + assert(EHScope2 != EHScopeMembership.end()); + if (EHScope1->second != EHScope2->second) return false; } CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2); if (CommonTailLen == 0) return false; - DEBUG(dbgs() << "Common tail length of " << printMBBReference(*MBB1) - << " and " << printMBBReference(*MBB2) << " is " << CommonTailLen - << '\n'); + LLVM_DEBUG(dbgs() << "Common tail length of " << printMBBReference(*MBB1) + << " and " << printMBBReference(*MBB2) << " is " + << CommonTailLen << '\n'); // It's almost always profitable to merge any number of non-terminator // instructions with the block that falls through into the common successor. @@ -706,7 +743,7 @@ unsigned BranchFolder::ComputeSameTails(unsigned CurHash, MinCommonTailLength, CommonTailLen, TrialBBI1, TrialBBI2, SuccBB, PredBB, - FuncletMembership, + EHScopeMembership, AfterBlockPlacement)) { if (CommonTailLen > maxCommonTailLength) { SameTails.clear(); @@ -770,8 +807,8 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, SameTails[commonTailIndex].getTailStartPos(); MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock(); - DEBUG(dbgs() << "\nSplitting " << printMBBReference(*MBB) << ", size " - << maxCommonTailLength); + LLVM_DEBUG(dbgs() << "\nSplitting " << printMBBReference(*MBB) << ", size " + << maxCommonTailLength); // If the split block unconditionally falls-thru to SuccBB, it will be // merged. In control flow terms it should then take SuccBB's name. e.g. If @@ -780,7 +817,7 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, SuccBB->getBasicBlock() : MBB->getBasicBlock(); MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI, BB); if (!newMBB) { - DEBUG(dbgs() << "... failed!"); + LLVM_DEBUG(dbgs() << "... failed!"); return false; } @@ -814,12 +851,12 @@ mergeOperations(MachineBasicBlock::iterator MBBIStartPos, assert(MBBI != MBBIE && "Reached BB end within common tail length!"); (void)MBBIE; - if (MBBI->isDebugValue()) { + if (!countsAsInstruction(*MBBI)) { ++MBBI; continue; } - while ((MBBICommon != MBBIECommon) && MBBICommon->isDebugValue()) + while ((MBBICommon != MBBIECommon) && !countsAsInstruction(*MBBICommon)) ++MBBICommon; assert(MBBICommon != MBBIECommon && @@ -859,7 +896,7 @@ void BranchFolder::mergeCommonTails(unsigned commonTailIndex) { } for (auto &MI : *MBB) { - if (MI.isDebugValue()) + if (!countsAsInstruction(MI)) continue; DebugLoc DL = MI.getDebugLoc(); for (unsigned int i = 0 ; i < NextCommonInsts.size() ; i++) { @@ -869,7 +906,7 @@ void BranchFolder::mergeCommonTails(unsigned commonTailIndex) { auto &Pos = NextCommonInsts[i]; assert(Pos != SameTails[i].getBlock()->end() && "Reached BB end within common tail"); - while (Pos->isDebugValue()) { + while (!countsAsInstruction(*Pos)) { ++Pos; assert(Pos != SameTails[i].getBlock()->end() && "Reached BB end within common tail"); @@ -884,11 +921,12 @@ void BranchFolder::mergeCommonTails(unsigned commonTailIndex) { if (UpdateLiveIns) { LivePhysRegs NewLiveIns(*TRI); computeLiveIns(NewLiveIns, *MBB); + LiveRegs.init(*TRI); // The flag merging may lead to some register uses no longer using the // <undef> flag, add IMPLICIT_DEFs in the predecessors as necessary. for (MachineBasicBlock *Pred : MBB->predecessors()) { - LiveRegs.init(*TRI); + LiveRegs.clear(); LiveRegs.addLiveOuts(*Pred); MachineBasicBlock::iterator InsertBefore = Pred->getFirstTerminator(); for (unsigned Reg : NewLiveIns) { @@ -919,18 +957,19 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, unsigned MinCommonTailLength) { bool MadeChange = false; - DEBUG(dbgs() << "\nTryTailMergeBlocks: "; - for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) dbgs() - << printMBBReference(*MergePotentials[i].getBlock()) - << (i == e - 1 ? "" : ", "); - dbgs() << "\n"; if (SuccBB) { - dbgs() << " with successor " << printMBBReference(*SuccBB) << '\n'; - if (PredBB) - dbgs() << " which has fall-through from " - << printMBBReference(*PredBB) << "\n"; - } dbgs() << "Looking for common tails of at least " - << MinCommonTailLength << " instruction" - << (MinCommonTailLength == 1 ? "" : "s") << '\n';); + LLVM_DEBUG( + dbgs() << "\nTryTailMergeBlocks: "; + for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) dbgs() + << printMBBReference(*MergePotentials[i].getBlock()) + << (i == e - 1 ? "" : ", "); + dbgs() << "\n"; if (SuccBB) { + dbgs() << " with successor " << printMBBReference(*SuccBB) << '\n'; + if (PredBB) + dbgs() << " which has fall-through from " + << printMBBReference(*PredBB) << "\n"; + } dbgs() << "Looking for common tails of at least " + << MinCommonTailLength << " instruction" + << (MinCommonTailLength == 1 ? "" : "s") << '\n';); // Sort by hash value so that blocks with identical end sequences sort // together. @@ -1010,19 +1049,19 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, // MBB is common tail. Adjust all other BB's to jump to this one. // Traversal must be forwards so erases work. - DEBUG(dbgs() << "\nUsing common tail in " << printMBBReference(*MBB) - << " for "); + LLVM_DEBUG(dbgs() << "\nUsing common tail in " << printMBBReference(*MBB) + << " for "); for (unsigned int i=0, e = SameTails.size(); i != e; ++i) { if (commonTailIndex == i) continue; - DEBUG(dbgs() << printMBBReference(*SameTails[i].getBlock()) - << (i == e - 1 ? "" : ", ")); + LLVM_DEBUG(dbgs() << printMBBReference(*SameTails[i].getBlock()) + << (i == e - 1 ? "" : ", ")); // Hack the end off BB i, making it jump to BB commonTailIndex instead. replaceTailWithBranchTo(SameTails[i].getTailStartPos(), *MBB); // BB i is no longer a predecessor of SuccBB; remove it from the worklist. MergePotentials.erase(SameTails[i].getMPIter()); } - DEBUG(dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "\n"); // We leave commonTailIndex in the worklist in case there are other blocks // that match it with a smaller number of instructions. MadeChange = true; @@ -1254,8 +1293,8 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) { // Make sure blocks are numbered in order MF.RenumberBlocks(); - // Renumbering blocks alters funclet membership, recalculate it. - FuncletMembership = getFuncletMembership(MF); + // Renumbering blocks alters EH scope membership, recalculate it. + EHScopeMembership = getEHScopeMembership(MF); for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end(); I != E; ) { @@ -1319,6 +1358,53 @@ static DebugLoc getBranchDebugLoc(MachineBasicBlock &MBB) { return DebugLoc(); } +static void copyDebugInfoToPredecessor(const TargetInstrInfo *TII, + MachineBasicBlock &MBB, + MachineBasicBlock &PredMBB) { + auto InsertBefore = PredMBB.getFirstTerminator(); + for (MachineInstr &MI : MBB.instrs()) + if (MI.isDebugValue()) { + TII->duplicate(PredMBB, InsertBefore, MI); + LLVM_DEBUG(dbgs() << "Copied debug value from empty block to pred: " + << MI); + } +} + +static void copyDebugInfoToSuccessor(const TargetInstrInfo *TII, + MachineBasicBlock &MBB, + MachineBasicBlock &SuccMBB) { + auto InsertBefore = SuccMBB.SkipPHIsAndLabels(SuccMBB.begin()); + for (MachineInstr &MI : MBB.instrs()) + if (MI.isDebugValue()) { + TII->duplicate(SuccMBB, InsertBefore, MI); + LLVM_DEBUG(dbgs() << "Copied debug value from empty block to succ: " + << MI); + } +} + +// Try to salvage DBG_VALUE instructions from an otherwise empty block. If such +// a basic block is removed we would lose the debug information unless we have +// copied the information to a predecessor/successor. +// +// TODO: This function only handles some simple cases. An alternative would be +// to run a heavier analysis, such as the LiveDebugValues pass, before we do +// branch folding. +static void salvageDebugInfoFromEmptyBlock(const TargetInstrInfo *TII, + MachineBasicBlock &MBB) { + assert(IsEmptyBlock(&MBB) && "Expected an empty block (except debug info)."); + // If this MBB is the only predecessor of a successor it is legal to copy + // DBG_VALUE instructions to the beginning of the successor. + for (MachineBasicBlock *SuccBB : MBB.successors()) + if (SuccBB->pred_size() == 1) + copyDebugInfoToSuccessor(TII, MBB, *SuccBB); + // If this MBB is the only successor of a predecessor it is legal to copy the + // DBG_VALUE instructions to the end of the predecessor (just before the + // terminators, assuming that the terminator isn't affecting the DBG_VALUE). + for (MachineBasicBlock *PredBB : MBB.predecessors()) + if (PredBB->succ_size() == 1) + copyDebugInfoToPredecessor(TII, MBB, *PredBB); +} + bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { bool MadeChange = false; MachineFunction &MF = *MBB->getParent(); @@ -1327,14 +1413,14 @@ ReoptimizeBlock: MachineFunction::iterator FallThrough = MBB->getIterator(); ++FallThrough; - // Make sure MBB and FallThrough belong to the same funclet. - bool SameFunclet = true; - if (!FuncletMembership.empty() && FallThrough != MF.end()) { - auto MBBFunclet = FuncletMembership.find(MBB); - assert(MBBFunclet != FuncletMembership.end()); - auto FallThroughFunclet = FuncletMembership.find(&*FallThrough); - assert(FallThroughFunclet != FuncletMembership.end()); - SameFunclet = MBBFunclet->second == FallThroughFunclet->second; + // Make sure MBB and FallThrough belong to the same EH scope. + bool SameEHScope = true; + if (!EHScopeMembership.empty() && FallThrough != MF.end()) { + auto MBBEHScope = EHScopeMembership.find(MBB); + assert(MBBEHScope != EHScopeMembership.end()); + auto FallThroughEHScope = EHScopeMembership.find(&*FallThrough); + assert(FallThroughEHScope != EHScopeMembership.end()); + SameEHScope = MBBEHScope->second == FallThroughEHScope->second; } // If this block is empty, make everyone use its fall-through, not the block @@ -1342,7 +1428,8 @@ ReoptimizeBlock: // points to this block. Blocks with their addresses taken shouldn't be // optimized away. if (IsEmptyBlock(MBB) && !MBB->isEHPad() && !MBB->hasAddressTaken() && - SameFunclet) { + SameEHScope) { + salvageDebugInfoFromEmptyBlock(TII, *MBB); // Dead block? Leave for cleanup later. if (MBB->pred_empty()) return MadeChange; @@ -1406,8 +1493,8 @@ ReoptimizeBlock: if (PriorCond.empty() && !PriorTBB && MBB->pred_size() == 1 && PrevBB.succ_size() == 1 && !MBB->hasAddressTaken() && !MBB->isEHPad()) { - DEBUG(dbgs() << "\nMerging into block: " << PrevBB - << "From MBB: " << *MBB); + LLVM_DEBUG(dbgs() << "\nMerging into block: " << PrevBB + << "From MBB: " << *MBB); // Remove redundant DBG_VALUEs first. if (PrevBB.begin() != PrevBB.end()) { MachineBasicBlock::iterator PrevBBIter = PrevBB.end(); @@ -1416,7 +1503,7 @@ ReoptimizeBlock: // Check if DBG_VALUE at the end of PrevBB is identical to the // DBG_VALUE at the beginning of MBB. while (PrevBBIter != PrevBB.begin() && MBBIter != MBB->end() - && PrevBBIter->isDebugValue() && MBBIter->isDebugValue()) { + && PrevBBIter->isDebugInstr() && MBBIter->isDebugInstr()) { if (!MBBIter->isIdenticalTo(*PrevBBIter)) break; MachineInstr &DuplicateDbg = *MBBIter; @@ -1493,8 +1580,8 @@ ReoptimizeBlock: // Reverse the branch so we will fall through on the previous true cond. SmallVector<MachineOperand, 4> NewPriorCond(PriorCond); if (!TII->reverseBranchCondition(NewPriorCond)) { - DEBUG(dbgs() << "\nMoving MBB: " << *MBB - << "To make fallthrough to: " << *PriorTBB << "\n"); + LLVM_DEBUG(dbgs() << "\nMoving MBB: " << *MBB + << "To make fallthrough to: " << *PriorTBB << "\n"); DebugLoc dl = getBranchDebugLoc(PrevBB); TII->removeBranch(PrevBB); @@ -1829,8 +1916,12 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, if (Uses.empty()) return Loc; + // If the terminator is the only instruction in the block and Uses is not + // empty (or we would have returned above), we can still safely hoist + // instructions just before the terminator as long as the Defs/Uses are not + // violated (which is checked in HoistCommonCodeInSuccs). if (Loc == MBB->begin()) - return MBB->end(); + return Loc; // The terminator is probably a conditional branch, try not to separate the // branch from condition setting instruction. @@ -1917,7 +2008,6 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { return false; bool HasDups = false; - SmallVector<unsigned, 4> LocalDefs, LocalKills; SmallSet<unsigned, 4> ActiveDefsSet, AllDefsSet; MachineBasicBlock::iterator TIB = TBB->begin(); MachineBasicBlock::iterator FIB = FBB->begin(); @@ -2000,7 +2090,6 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { if (!Reg) continue; if (!AllDefsSet.count(Reg)) { - LocalKills.push_back(Reg); continue; } if (TargetRegisterInfo::isPhysicalRegister(Reg)) { @@ -2018,7 +2107,6 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { unsigned Reg = MO.getReg(); if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg)) continue; - LocalDefs.push_back(Reg); addRegAndItsAliases(Reg, TRI, ActiveDefsSet); addRegAndItsAliases(Reg, TRI, AllDefsSet); } @@ -2034,25 +2122,9 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { MBB->splice(Loc, TBB, TBB->begin(), TIB); FBB->erase(FBB->begin(), FIB); - // Update livein's. - bool ChangedLiveIns = false; - for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) { - unsigned Def = LocalDefs[i]; - if (ActiveDefsSet.count(Def)) { - TBB->addLiveIn(Def); - FBB->addLiveIn(Def); - ChangedLiveIns = true; - } - } - for (unsigned K : LocalKills) { - TBB->removeLiveIn(K); - FBB->removeLiveIn(K); - ChangedLiveIns = true; - } - - if (ChangedLiveIns) { - TBB->sortUniqueLiveIns(); - FBB->sortUniqueLiveIns(); + if (UpdateLiveIns) { + recomputeLiveIns(*TBB); + recomputeLiveIns(*FBB); } ++NumHoist; diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.h b/contrib/llvm/lib/CodeGen/BranchFolding.h index 0f0952550137..accd0ab7317b 100644 --- a/contrib/llvm/lib/CodeGen/BranchFolding.h +++ b/contrib/llvm/lib/CodeGen/BranchFolding.h @@ -38,11 +38,11 @@ class TargetRegisterInfo; explicit BranchFolder(bool defaultEnableTailMerge, bool CommonHoist, - MBFIWrapper &MBFI, - const MachineBranchProbabilityInfo &MBPI, + MBFIWrapper &FreqInfo, + const MachineBranchProbabilityInfo &ProbInfo, // Min tail length to merge. Defaults to commandline // flag. Ignored for optsize. - unsigned MinCommonTailLength = 0); + unsigned MinTailLength = 0); /// Perhaps branch folding, tail merging and other CFG optimizations on the /// given function. Block placement changes the layout and may create new @@ -75,7 +75,7 @@ class TargetRegisterInfo; std::vector<MergePotentialsElt> MergePotentials; SmallPtrSet<const MachineBasicBlock*, 2> TriedMerging; - DenseMap<const MachineBasicBlock *, int> FuncletMembership; + DenseMap<const MachineBasicBlock *, int> EHScopeMembership; class SameTailElt { MPIterator MPIter; @@ -132,7 +132,7 @@ class TargetRegisterInfo; LivePhysRegs LiveRegs; public: - /// \brief This class keeps track of branch frequencies of newly created + /// This class keeps track of branch frequencies of newly created /// blocks and tail-merged blocks. class MBFIWrapper { public: diff --git a/contrib/llvm/lib/CodeGen/BranchRelaxation.cpp b/contrib/llvm/lib/CodeGen/BranchRelaxation.cpp index 0d87f142c7cc..c092da2b6602 100644 --- a/contrib/llvm/lib/CodeGen/BranchRelaxation.cpp +++ b/contrib/llvm/lib/CodeGen/BranchRelaxation.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/DebugLoc.h" #include "llvm/Pass.h" #include "llvm/Support/Compiler.h" @@ -95,7 +96,7 @@ class BranchRelaxation : public MachineFunctionPass { MachineBasicBlock *splitBlockBeforeInstr(MachineInstr &MI, MachineBasicBlock *DestBB); - void adjustBlockOffsets(MachineBasicBlock &MBB); + void adjustBlockOffsets(MachineBasicBlock &Start); bool isBlockInRange(const MachineInstr &MI, const MachineBasicBlock &BB) const; bool fixupConditionalBranch(MachineInstr &MI); @@ -287,10 +288,11 @@ bool BranchRelaxation::isBlockInRange( if (TII->isBranchOffsetInRange(MI.getOpcode(), DestOffset - BrOffset)) return true; - DEBUG(dbgs() << "Out of range branch to destination " - << printMBBReference(DestBB) << " from " - << printMBBReference(*MI.getParent()) << " to " << DestOffset - << " offset " << DestOffset - BrOffset << '\t' << MI); + LLVM_DEBUG(dbgs() << "Out of range branch to destination " + << printMBBReference(DestBB) << " from " + << printMBBReference(*MI.getParent()) << " to " + << DestOffset << " offset " << DestOffset - BrOffset << '\t' + << MI); return false; } @@ -302,8 +304,41 @@ bool BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) { DebugLoc DL = MI.getDebugLoc(); MachineBasicBlock *MBB = MI.getParent(); MachineBasicBlock *TBB = nullptr, *FBB = nullptr; + MachineBasicBlock *NewBB = nullptr; SmallVector<MachineOperand, 4> Cond; + auto insertUncondBranch = [&](MachineBasicBlock *MBB, + MachineBasicBlock *DestBB) { + unsigned &BBSize = BlockInfo[MBB->getNumber()].Size; + int NewBrSize = 0; + TII->insertUnconditionalBranch(*MBB, DestBB, DL, &NewBrSize); + BBSize += NewBrSize; + }; + auto insertBranch = [&](MachineBasicBlock *MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + SmallVectorImpl<MachineOperand>& Cond) { + unsigned &BBSize = BlockInfo[MBB->getNumber()].Size; + int NewBrSize = 0; + TII->insertBranch(*MBB, TBB, FBB, Cond, DL, &NewBrSize); + BBSize += NewBrSize; + }; + auto removeBranch = [&](MachineBasicBlock *MBB) { + unsigned &BBSize = BlockInfo[MBB->getNumber()].Size; + int RemovedSize = 0; + TII->removeBranch(*MBB, &RemovedSize); + BBSize -= RemovedSize; + }; + + auto finalizeBlockChanges = [&](MachineBasicBlock *MBB, + MachineBasicBlock *NewBB) { + // Keep the block offsets up to date. + adjustBlockOffsets(*MBB); + + // Need to fix live-in lists if we track liveness. + if (NewBB && TRI->trackLivenessAfterRegAlloc(*MF)) + computeAndAddLiveIns(LiveRegs, *NewBB); + }; + bool Fail = TII->analyzeBranch(*MBB, TBB, FBB, Cond); assert(!Fail && "branches to be relaxed must be analyzable"); (void)Fail; @@ -316,71 +351,90 @@ bool BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) { // b L1 // L2: - if (FBB && isBlockInRange(MI, *FBB)) { - // Last MI in the BB is an unconditional branch. We can simply invert the - // condition and swap destinations: - // beq L1 - // b L2 - // => - // bne L2 - // b L1 - DEBUG(dbgs() << " Invert condition and swap " - "its destination with " << MBB->back()); - - TII->reverseBranchCondition(Cond); - int OldSize = 0, NewSize = 0; - TII->removeBranch(*MBB, &OldSize); - TII->insertBranch(*MBB, FBB, TBB, Cond, DL, &NewSize); - - BlockInfo[MBB->getNumber()].Size += (NewSize - OldSize); - return true; - } else if (FBB) { - // We need to split the basic block here to obtain two long-range - // unconditional branches. - auto &NewBB = *MF->CreateMachineBasicBlock(MBB->getBasicBlock()); - MF->insert(++MBB->getIterator(), &NewBB); - - // Insert an entry into BlockInfo to align it properly with the block - // numbers. - BlockInfo.insert(BlockInfo.begin() + NewBB.getNumber(), BasicBlockInfo()); - - unsigned &NewBBSize = BlockInfo[NewBB.getNumber()].Size; - int NewBrSize; - TII->insertUnconditionalBranch(NewBB, FBB, DL, &NewBrSize); - NewBBSize += NewBrSize; - - // Update the successor lists according to the transformation to follow. - // Do it here since if there's no split, no update is needed. - MBB->replaceSuccessor(FBB, &NewBB); - NewBB.addSuccessor(FBB); + bool ReversedCond = !TII->reverseBranchCondition(Cond); + if (ReversedCond) { + if (FBB && isBlockInRange(MI, *FBB)) { + // Last MI in the BB is an unconditional branch. We can simply invert the + // condition and swap destinations: + // beq L1 + // b L2 + // => + // bne L2 + // b L1 + LLVM_DEBUG(dbgs() << " Invert condition and swap " + "its destination with " + << MBB->back()); + + removeBranch(MBB); + insertBranch(MBB, FBB, TBB, Cond); + finalizeBlockChanges(MBB, nullptr); + return true; + } + if (FBB) { + // We need to split the basic block here to obtain two long-range + // unconditional branches. + NewBB = createNewBlockAfter(*MBB); + + insertUncondBranch(NewBB, FBB); + // Update the succesor lists according to the transformation to follow. + // Do it here since if there's no split, no update is needed. + MBB->replaceSuccessor(FBB, NewBB); + NewBB->addSuccessor(FBB); + } - // Need to fix live-in lists if we track liveness. - if (TRI->trackLivenessAfterRegAlloc(*MF)) - computeAndAddLiveIns(LiveRegs, NewBB); + // We now have an appropriate fall-through block in place (either naturally or + // just created), so we can use the inverted the condition. + MachineBasicBlock &NextBB = *std::next(MachineFunction::iterator(MBB)); + + LLVM_DEBUG(dbgs() << " Insert B to " << printMBBReference(*TBB) + << ", invert condition and change dest. to " + << printMBBReference(NextBB) << '\n'); + + removeBranch(MBB); + // Insert a new conditional branch and a new unconditional branch. + insertBranch(MBB, &NextBB, TBB, Cond); + + finalizeBlockChanges(MBB, NewBB); + return true; } + // Branch cond can't be inverted. + // In this case we always add a block after the MBB. + LLVM_DEBUG(dbgs() << " The branch condition can't be inverted. " + << " Insert a new BB after " << MBB->back()); - // We now have an appropriate fall-through block in place (either naturally or - // just created), so we can invert the condition. - MachineBasicBlock &NextBB = *std::next(MachineFunction::iterator(MBB)); + if (!FBB) + FBB = &(*std::next(MachineFunction::iterator(MBB))); - DEBUG(dbgs() << " Insert B to " << printMBBReference(*TBB) - << ", invert condition and change dest. to " - << printMBBReference(NextBB) << '\n'); + // This is the block with cond. branch and the distance to TBB is too long. + // beq L1 + // L2: - unsigned &MBBSize = BlockInfo[MBB->getNumber()].Size; + // We do the following transformation: + // beq NewBB + // b L2 + // NewBB: + // b L1 + // L2: - // Insert a new conditional branch and a new unconditional branch. - int RemovedSize = 0; - TII->reverseBranchCondition(Cond); - TII->removeBranch(*MBB, &RemovedSize); - MBBSize -= RemovedSize; + NewBB = createNewBlockAfter(*MBB); + insertUncondBranch(NewBB, TBB); - int AddedSize = 0; - TII->insertBranch(*MBB, &NextBB, TBB, Cond, DL, &AddedSize); - MBBSize += AddedSize; + LLVM_DEBUG(dbgs() << " Insert cond B to the new BB " + << printMBBReference(*NewBB) + << " Keep the exiting condition.\n" + << " Insert B to " << printMBBReference(*FBB) << ".\n" + << " In the new BB: Insert B to " + << printMBBReference(*TBB) << ".\n"); - // Finally, keep the block offsets up to date. - adjustBlockOffsets(*MBB); + // Update the successor lists according to the transformation to follow. + MBB->replaceSuccessor(TBB, NewBB); + NewBB->addSuccessor(TBB); + + // Replace branch in the current (MBB) block. + removeBranch(MBB); + insertBranch(MBB, NewBB, FBB, Cond); + + finalizeBlockChanges(MBB, NewBB); return true; } @@ -490,7 +544,7 @@ bool BranchRelaxation::relaxBranchInstructions() { bool BranchRelaxation::runOnMachineFunction(MachineFunction &mf) { MF = &mf; - DEBUG(dbgs() << "***** BranchRelaxation *****\n"); + LLVM_DEBUG(dbgs() << "***** BranchRelaxation *****\n"); const TargetSubtargetInfo &ST = MF->getSubtarget(); TII = ST.getInstrInfo(); @@ -507,7 +561,7 @@ bool BranchRelaxation::runOnMachineFunction(MachineFunction &mf) { // sizes of each block. scanFunction(); - DEBUG(dbgs() << " Basic blocks before relaxation\n"; dumpBBs();); + LLVM_DEBUG(dbgs() << " Basic blocks before relaxation\n"; dumpBBs();); bool MadeChange = false; while (relaxBranchInstructions()) @@ -516,7 +570,7 @@ bool BranchRelaxation::runOnMachineFunction(MachineFunction &mf) { // After a while, this might be made debug-only, but it is not expensive. verify(); - DEBUG(dbgs() << " Basic blocks after relaxation\n\n"; dumpBBs()); + LLVM_DEBUG(dbgs() << " Basic blocks after relaxation\n\n"; dumpBBs()); BlockInfo.clear(); diff --git a/contrib/llvm/lib/CodeGen/BreakFalseDeps.cpp b/contrib/llvm/lib/CodeGen/BreakFalseDeps.cpp new file mode 100644 index 000000000000..7f098cb71657 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/BreakFalseDeps.cpp @@ -0,0 +1,271 @@ +//==- llvm/CodeGen/BreakFalseDeps.cpp - Break False Dependency Fix -*- C++ -*==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file Break False Dependency pass. +/// +/// Some instructions have false dependencies which cause unnecessary stalls. +/// For exmaple, instructions that only write part of a register, and implicitly +/// need to read the other parts of the register. This may cause unwanted +/// stalls preventing otherwise unrelated instructions from executing in +/// parallel in an out-of-order CPU. +/// This pass is aimed at identifying and avoiding these depepndencies when +/// possible. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/ReachingDefAnalysis.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" + + +using namespace llvm; + +namespace llvm { + +class BreakFalseDeps : public MachineFunctionPass { +private: + MachineFunction *MF; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + RegisterClassInfo RegClassInfo; + + /// List of undefined register reads in this block in forward order. + std::vector<std::pair<MachineInstr *, unsigned>> UndefReads; + + /// Storage for register unit liveness. + LivePhysRegs LiveRegSet; + + ReachingDefAnalysis *RDA; + +public: + static char ID; // Pass identification, replacement for typeid + + BreakFalseDeps() : MachineFunctionPass(ID) { + initializeBreakFalseDepsPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + AU.addRequired<ReachingDefAnalysis>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + +private: + /// Process he given basic block. + void processBasicBlock(MachineBasicBlock *MBB); + + /// Update def-ages for registers defined by MI. + /// Also break dependencies on partial defs and undef uses. + void processDefs(MachineInstr *MI); + + /// Helps avoid false dependencies on undef registers by updating the + /// machine instructions' undef operand to use a register that the instruction + /// is truly dependent on, or use a register with clearance higher than Pref. + /// Returns true if it was able to find a true dependency, thus not requiring + /// a dependency breaking instruction regardless of clearance. + bool pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, + unsigned Pref); + + /// Return true to if it makes sense to break dependence on a partial + /// def or undef use. + bool shouldBreakDependence(MachineInstr *, unsigned OpIdx, unsigned Pref); + + /// Break false dependencies on undefined register reads. + /// Walk the block backward computing precise liveness. This is expensive, so + /// we only do it on demand. Note that the occurrence of undefined register + /// reads that should be broken is very rare, but when they occur we may have + /// many in a single block. + void processUndefReads(MachineBasicBlock *); +}; + +} // namespace llvm + +#define DEBUG_TYPE "break-false-deps" + +char BreakFalseDeps::ID = 0; +INITIALIZE_PASS_BEGIN(BreakFalseDeps, DEBUG_TYPE, "BreakFalseDeps", false, false) +INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis) +INITIALIZE_PASS_END(BreakFalseDeps, DEBUG_TYPE, "BreakFalseDeps", false, false) + +FunctionPass *llvm::createBreakFalseDeps() { return new BreakFalseDeps(); } + +bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, + unsigned Pref) { + MachineOperand &MO = MI->getOperand(OpIdx); + assert(MO.isUndef() && "Expected undef machine operand"); + + unsigned OriginalReg = MO.getReg(); + + // Update only undef operands that have reg units that are mapped to one root. + for (MCRegUnitIterator Unit(OriginalReg, TRI); Unit.isValid(); ++Unit) { + unsigned NumRoots = 0; + for (MCRegUnitRootIterator Root(*Unit, TRI); Root.isValid(); ++Root) { + NumRoots++; + if (NumRoots > 1) + return false; + } + } + + // Get the undef operand's register class + const TargetRegisterClass *OpRC = + TII->getRegClass(MI->getDesc(), OpIdx, TRI, *MF); + + // If the instruction has a true dependency, we can hide the false depdency + // behind it. + for (MachineOperand &CurrMO : MI->operands()) { + if (!CurrMO.isReg() || CurrMO.isDef() || CurrMO.isUndef() || + !OpRC->contains(CurrMO.getReg())) + continue; + // We found a true dependency - replace the undef register with the true + // dependency. + MO.setReg(CurrMO.getReg()); + return true; + } + + // Go over all registers in the register class and find the register with + // max clearance or clearance higher than Pref. + unsigned MaxClearance = 0; + unsigned MaxClearanceReg = OriginalReg; + ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(OpRC); + for (MCPhysReg Reg : Order) { + unsigned Clearance = RDA->getClearance(MI, Reg); + if (Clearance <= MaxClearance) + continue; + MaxClearance = Clearance; + MaxClearanceReg = Reg; + + if (MaxClearance > Pref) + break; + } + + // Update the operand if we found a register with better clearance. + if (MaxClearanceReg != OriginalReg) + MO.setReg(MaxClearanceReg); + + return false; +} + +bool BreakFalseDeps::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx, + unsigned Pref) { + unsigned reg = MI->getOperand(OpIdx).getReg(); + unsigned Clearance = RDA->getClearance(MI, reg); + LLVM_DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); + + if (Pref > Clearance) { + LLVM_DEBUG(dbgs() << ": Break dependency.\n"); + return true; + } + LLVM_DEBUG(dbgs() << ": OK .\n"); + return false; +} + +void BreakFalseDeps::processDefs(MachineInstr *MI) { + assert(!MI->isDebugInstr() && "Won't process debug values"); + + // Break dependence on undef uses. Do this before updating LiveRegs below. + unsigned OpNum; + unsigned Pref = TII->getUndefRegClearance(*MI, OpNum, TRI); + if (Pref) { + bool HadTrueDependency = pickBestRegisterForUndef(MI, OpNum, Pref); + // We don't need to bother trying to break a dependency if this + // instruction has a true dependency on that register through another + // operand - we'll have to wait for it to be available regardless. + if (!HadTrueDependency && shouldBreakDependence(MI, OpNum, Pref)) + UndefReads.push_back(std::make_pair(MI, OpNum)); + } + + const MCInstrDesc &MCID = MI->getDesc(); + for (unsigned i = 0, + e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); + i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.getReg()) + continue; + if (MO.isUse()) + continue; + // Check clearance before partial register updates. + unsigned Pref = TII->getPartialRegUpdateClearance(*MI, i, TRI); + if (Pref && shouldBreakDependence(MI, i, Pref)) + TII->breakPartialRegDependency(*MI, i, TRI); + } +} + +void BreakFalseDeps::processUndefReads(MachineBasicBlock *MBB) { + if (UndefReads.empty()) + return; + + // Collect this block's live out register units. + LiveRegSet.init(*TRI); + // We do not need to care about pristine registers as they are just preserved + // but not actually used in the function. + LiveRegSet.addLiveOutsNoPristines(*MBB); + + MachineInstr *UndefMI = UndefReads.back().first; + unsigned OpIdx = UndefReads.back().second; + + for (MachineInstr &I : make_range(MBB->rbegin(), MBB->rend())) { + // Update liveness, including the current instruction's defs. + LiveRegSet.stepBackward(I); + + if (UndefMI == &I) { + if (!LiveRegSet.contains(UndefMI->getOperand(OpIdx).getReg())) + TII->breakPartialRegDependency(*UndefMI, OpIdx, TRI); + + UndefReads.pop_back(); + if (UndefReads.empty()) + return; + + UndefMI = UndefReads.back().first; + OpIdx = UndefReads.back().second; + } + } +} + +void BreakFalseDeps::processBasicBlock(MachineBasicBlock *MBB) { + UndefReads.clear(); + // If this block is not done, it makes little sense to make any decisions + // based on clearance information. We need to make a second pass anyway, + // and by then we'll have better information, so we can avoid doing the work + // to try and break dependencies now. + for (MachineInstr &MI : *MBB) { + if (!MI.isDebugInstr()) + processDefs(&MI); + } + processUndefReads(MBB); +} + +bool BreakFalseDeps::runOnMachineFunction(MachineFunction &mf) { + if (skipFunction(mf.getFunction())) + return false; + MF = &mf; + TII = MF->getSubtarget().getInstrInfo(); + TRI = MF->getSubtarget().getRegisterInfo(); + RDA = &getAnalysis<ReachingDefAnalysis>(); + + RegClassInfo.runOnMachineFunction(mf); + + LLVM_DEBUG(dbgs() << "********** BREAK FALSE DEPENDENCIES **********\n"); + + // Traverse the basic blocks. + for (MachineBasicBlock &MBB : mf) { + processBasicBlock(&MBB); + } + + return false; +} diff --git a/contrib/llvm/lib/CodeGen/CFIInstrInserter.cpp b/contrib/llvm/lib/CodeGen/CFIInstrInserter.cpp new file mode 100644 index 000000000000..00ebf63fc174 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/CFIInstrInserter.cpp @@ -0,0 +1,326 @@ +//===------ CFIInstrInserter.cpp - Insert additional CFI instructions -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file This pass verifies incoming and outgoing CFA information of basic +/// blocks. CFA information is information about offset and register set by CFI +/// directives, valid at the start and end of a basic block. This pass checks +/// that outgoing information of predecessors matches incoming information of +/// their successors. Then it checks if blocks have correct CFA calculation rule +/// set and inserts additional CFI instruction at their beginnings if they +/// don't. CFI instructions are inserted if basic blocks have incorrect offset +/// or register set by previous blocks, as a result of a non-linear layout of +/// blocks in a function. +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +static cl::opt<bool> VerifyCFI("verify-cfiinstrs", + cl::desc("Verify Call Frame Information instructions"), + cl::init(false), + cl::Hidden); + +namespace { +class CFIInstrInserter : public MachineFunctionPass { + public: + static char ID; + + CFIInstrInserter() : MachineFunctionPass(ID) { + initializeCFIInstrInserterPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override { + if (!MF.getMMI().hasDebugInfo() && + !MF.getFunction().needsUnwindTableEntry()) + return false; + + MBBVector.resize(MF.getNumBlockIDs()); + calculateCFAInfo(MF); + + if (VerifyCFI) { + if (unsigned ErrorNum = verify(MF)) + report_fatal_error("Found " + Twine(ErrorNum) + + " in/out CFI information errors."); + } + bool insertedCFI = insertCFIInstrs(MF); + MBBVector.clear(); + return insertedCFI; + } + + private: + struct MBBCFAInfo { + MachineBasicBlock *MBB; + /// Value of cfa offset valid at basic block entry. + int IncomingCFAOffset = -1; + /// Value of cfa offset valid at basic block exit. + int OutgoingCFAOffset = -1; + /// Value of cfa register valid at basic block entry. + unsigned IncomingCFARegister = 0; + /// Value of cfa register valid at basic block exit. + unsigned OutgoingCFARegister = 0; + /// If in/out cfa offset and register values for this block have already + /// been set or not. + bool Processed = false; + }; + + /// Contains cfa offset and register values valid at entry and exit of basic + /// blocks. + std::vector<MBBCFAInfo> MBBVector; + + /// Calculate cfa offset and register values valid at entry and exit for all + /// basic blocks in a function. + void calculateCFAInfo(MachineFunction &MF); + /// Calculate cfa offset and register values valid at basic block exit by + /// checking the block for CFI instructions. Block's incoming CFA info remains + /// the same. + void calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo); + /// Update in/out cfa offset and register values for successors of the basic + /// block. + void updateSuccCFAInfo(MBBCFAInfo &MBBInfo); + + /// Check if incoming CFA information of a basic block matches outgoing CFA + /// information of the previous block. If it doesn't, insert CFI instruction + /// at the beginning of the block that corrects the CFA calculation rule for + /// that block. + bool insertCFIInstrs(MachineFunction &MF); + /// Return the cfa offset value that should be set at the beginning of a MBB + /// if needed. The negated value is needed when creating CFI instructions that + /// set absolute offset. + int getCorrectCFAOffset(MachineBasicBlock *MBB) { + return -MBBVector[MBB->getNumber()].IncomingCFAOffset; + } + + void report(const MBBCFAInfo &Pred, const MBBCFAInfo &Succ); + /// Go through each MBB in a function and check that outgoing offset and + /// register of its predecessors match incoming offset and register of that + /// MBB, as well as that incoming offset and register of its successors match + /// outgoing offset and register of the MBB. + unsigned verify(MachineFunction &MF); +}; +} // namespace + +char CFIInstrInserter::ID = 0; +INITIALIZE_PASS(CFIInstrInserter, "cfi-instr-inserter", + "Check CFA info and insert CFI instructions if needed", false, + false) +FunctionPass *llvm::createCFIInstrInserter() { return new CFIInstrInserter(); } + +void CFIInstrInserter::calculateCFAInfo(MachineFunction &MF) { + // Initial CFA offset value i.e. the one valid at the beginning of the + // function. + int InitialOffset = + MF.getSubtarget().getFrameLowering()->getInitialCFAOffset(MF); + // Initial CFA register value i.e. the one valid at the beginning of the + // function. + unsigned InitialRegister = + MF.getSubtarget().getFrameLowering()->getInitialCFARegister(MF); + + // Initialize MBBMap. + for (MachineBasicBlock &MBB : MF) { + MBBCFAInfo MBBInfo; + MBBInfo.MBB = &MBB; + MBBInfo.IncomingCFAOffset = InitialOffset; + MBBInfo.OutgoingCFAOffset = InitialOffset; + MBBInfo.IncomingCFARegister = InitialRegister; + MBBInfo.OutgoingCFARegister = InitialRegister; + MBBVector[MBB.getNumber()] = MBBInfo; + } + + // Set in/out cfa info for all blocks in the function. This traversal is based + // on the assumption that the first block in the function is the entry block + // i.e. that it has initial cfa offset and register values as incoming CFA + // information. + for (MachineBasicBlock &MBB : MF) { + if (MBBVector[MBB.getNumber()].Processed) continue; + updateSuccCFAInfo(MBBVector[MBB.getNumber()]); + } +} + +void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) { + // Outgoing cfa offset set by the block. + int SetOffset = MBBInfo.IncomingCFAOffset; + // Outgoing cfa register set by the block. + unsigned SetRegister = MBBInfo.IncomingCFARegister; + const std::vector<MCCFIInstruction> &Instrs = + MBBInfo.MBB->getParent()->getFrameInstructions(); + + // Determine cfa offset and register set by the block. + for (MachineInstr &MI : *MBBInfo.MBB) { + if (MI.isCFIInstruction()) { + unsigned CFIIndex = MI.getOperand(0).getCFIIndex(); + const MCCFIInstruction &CFI = Instrs[CFIIndex]; + switch (CFI.getOperation()) { + case MCCFIInstruction::OpDefCfaRegister: + SetRegister = CFI.getRegister(); + break; + case MCCFIInstruction::OpDefCfaOffset: + SetOffset = CFI.getOffset(); + break; + case MCCFIInstruction::OpAdjustCfaOffset: + SetOffset += CFI.getOffset(); + break; + case MCCFIInstruction::OpDefCfa: + SetRegister = CFI.getRegister(); + SetOffset = CFI.getOffset(); + break; + case MCCFIInstruction::OpRememberState: + // TODO: Add support for handling cfi_remember_state. +#ifndef NDEBUG + report_fatal_error( + "Support for cfi_remember_state not implemented! Value of CFA " + "may be incorrect!\n"); +#endif + break; + case MCCFIInstruction::OpRestoreState: + // TODO: Add support for handling cfi_restore_state. +#ifndef NDEBUG + report_fatal_error( + "Support for cfi_restore_state not implemented! Value of CFA may " + "be incorrect!\n"); +#endif + break; + // Other CFI directives do not affect CFA value. + case MCCFIInstruction::OpSameValue: + case MCCFIInstruction::OpOffset: + case MCCFIInstruction::OpRelOffset: + case MCCFIInstruction::OpEscape: + case MCCFIInstruction::OpRestore: + case MCCFIInstruction::OpUndefined: + case MCCFIInstruction::OpRegister: + case MCCFIInstruction::OpWindowSave: + case MCCFIInstruction::OpGnuArgsSize: + break; + } + } + } + + MBBInfo.Processed = true; + + // Update outgoing CFA info. + MBBInfo.OutgoingCFAOffset = SetOffset; + MBBInfo.OutgoingCFARegister = SetRegister; +} + +void CFIInstrInserter::updateSuccCFAInfo(MBBCFAInfo &MBBInfo) { + SmallVector<MachineBasicBlock *, 4> Stack; + Stack.push_back(MBBInfo.MBB); + + do { + MachineBasicBlock *Current = Stack.pop_back_val(); + MBBCFAInfo &CurrentInfo = MBBVector[Current->getNumber()]; + if (CurrentInfo.Processed) + continue; + + calculateOutgoingCFAInfo(CurrentInfo); + for (auto *Succ : CurrentInfo.MBB->successors()) { + MBBCFAInfo &SuccInfo = MBBVector[Succ->getNumber()]; + if (!SuccInfo.Processed) { + SuccInfo.IncomingCFAOffset = CurrentInfo.OutgoingCFAOffset; + SuccInfo.IncomingCFARegister = CurrentInfo.OutgoingCFARegister; + Stack.push_back(Succ); + } + } + } while (!Stack.empty()); +} + +bool CFIInstrInserter::insertCFIInstrs(MachineFunction &MF) { + const MBBCFAInfo *PrevMBBInfo = &MBBVector[MF.front().getNumber()]; + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + bool InsertedCFIInstr = false; + + for (MachineBasicBlock &MBB : MF) { + // Skip the first MBB in a function + if (MBB.getNumber() == MF.front().getNumber()) continue; + + const MBBCFAInfo &MBBInfo = MBBVector[MBB.getNumber()]; + auto MBBI = MBBInfo.MBB->begin(); + DebugLoc DL = MBBInfo.MBB->findDebugLoc(MBBI); + + if (PrevMBBInfo->OutgoingCFAOffset != MBBInfo.IncomingCFAOffset) { + // If both outgoing offset and register of a previous block don't match + // incoming offset and register of this block, add a def_cfa instruction + // with the correct offset and register for this block. + if (PrevMBBInfo->OutgoingCFARegister != MBBInfo.IncomingCFARegister) { + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa( + nullptr, MBBInfo.IncomingCFARegister, getCorrectCFAOffset(&MBB))); + BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + // If outgoing offset of a previous block doesn't match incoming offset + // of this block, add a def_cfa_offset instruction with the correct + // offset for this block. + } else { + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::createDefCfaOffset( + nullptr, getCorrectCFAOffset(&MBB))); + BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } + InsertedCFIInstr = true; + // If outgoing register of a previous block doesn't match incoming + // register of this block, add a def_cfa_register instruction with the + // correct register for this block. + } else if (PrevMBBInfo->OutgoingCFARegister != + MBBInfo.IncomingCFARegister) { + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::createDefCfaRegister( + nullptr, MBBInfo.IncomingCFARegister)); + BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + InsertedCFIInstr = true; + } + PrevMBBInfo = &MBBInfo; + } + return InsertedCFIInstr; +} + +void CFIInstrInserter::report(const MBBCFAInfo &Pred, const MBBCFAInfo &Succ) { + errs() << "*** Inconsistent CFA register and/or offset between pred and succ " + "***\n"; + errs() << "Pred: " << Pred.MBB->getName() << " #" << Pred.MBB->getNumber() + << " in " << Pred.MBB->getParent()->getName() + << " outgoing CFA Reg:" << Pred.OutgoingCFARegister << "\n"; + errs() << "Pred: " << Pred.MBB->getName() << " #" << Pred.MBB->getNumber() + << " in " << Pred.MBB->getParent()->getName() + << " outgoing CFA Offset:" << Pred.OutgoingCFAOffset << "\n"; + errs() << "Succ: " << Succ.MBB->getName() << " #" << Succ.MBB->getNumber() + << " incoming CFA Reg:" << Succ.IncomingCFARegister << "\n"; + errs() << "Succ: " << Succ.MBB->getName() << " #" << Succ.MBB->getNumber() + << " incoming CFA Offset:" << Succ.IncomingCFAOffset << "\n"; +} + +unsigned CFIInstrInserter::verify(MachineFunction &MF) { + unsigned ErrorNum = 0; + for (auto *CurrMBB : depth_first(&MF)) { + const MBBCFAInfo &CurrMBBInfo = MBBVector[CurrMBB->getNumber()]; + for (MachineBasicBlock *Succ : CurrMBB->successors()) { + const MBBCFAInfo &SuccMBBInfo = MBBVector[Succ->getNumber()]; + // Check that incoming offset and register values of successors match the + // outgoing offset and register values of CurrMBB + if (SuccMBBInfo.IncomingCFAOffset != CurrMBBInfo.OutgoingCFAOffset || + SuccMBBInfo.IncomingCFARegister != CurrMBBInfo.OutgoingCFARegister) { + report(CurrMBBInfo, SuccMBBInfo); + ErrorNum++; + } + } + } + return ErrorNum; +} diff --git a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp index b8920a601938..57541182cab2 100644 --- a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -35,8 +35,8 @@ void llvm::calculateSpillWeightsAndHints(LiveIntervals &LIS, const MachineLoopInfo &MLI, const MachineBlockFrequencyInfo &MBFI, VirtRegAuxInfo::NormalizingFn norm) { - DEBUG(dbgs() << "********** Compute Spill Weights **********\n" - << "********** Function: " << MF.getName() << '\n'); + LLVM_DEBUG(dbgs() << "********** Compute Spill Weights **********\n" + << "********** Function: " << MF.getName() << '\n'); MachineRegisterInfo &MRI = MF.getRegInfo(); VirtRegAuxInfo VRAI(MF, LIS, VRM, MLI, MBFI, norm); @@ -236,7 +236,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start, continue; numInstr++; - if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugValue()) + if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugInstr()) continue; if (!visited.insert(mi).second) continue; diff --git a/contrib/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm/lib/CodeGen/CodeGen.cpp index 879cd2859ee9..2f845354c570 100644 --- a/contrib/llvm/lib/CodeGen/CodeGen.cpp +++ b/contrib/llvm/lib/CodeGen/CodeGen.cpp @@ -23,11 +23,14 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeAtomicExpandPass(Registry); initializeBranchFolderPassPass(Registry); initializeBranchRelaxationPass(Registry); + initializeCFIInstrInserterPass(Registry); initializeCodeGenPreparePass(Registry); initializeDeadMachineInstructionElimPass(Registry); initializeDetectDeadLanesPass(Registry); initializeDwarfEHPreparePass(Registry); initializeEarlyIfConverterPass(Registry); + initializeEarlyMachineLICMPass(Registry); + initializeEarlyTailDuplicatePass(Registry); initializeExpandISelPseudosPass(Registry); initializeExpandMemCmpPassPass(Registry); initializeExpandPostRAPass(Registry); @@ -48,6 +51,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeLiveVariablesPass(Registry); initializeLocalStackSlotPassPass(Registry); initializeLowerIntrinsicsPass(Registry); + initializeMIRCanonicalizerPass(Registry); initializeMachineBlockFrequencyInfoPass(Registry); initializeMachineBlockPlacementPass(Registry); initializeMachineBlockPlacementStatsPass(Registry); @@ -74,12 +78,15 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializePeepholeOptimizerPass(Registry); initializePostMachineSchedulerPass(Registry); initializePostRAHazardRecognizerPass(Registry); + initializePostRAMachineSinkingPass(Registry); initializePostRASchedulerPass(Registry); initializePreISelIntrinsicLoweringLegacyPassPass(Registry); initializeProcessImplicitDefsPass(Registry); initializeRABasicPass(Registry); - initializeRegAllocFastPass(Registry); initializeRAGreedyPass(Registry); + initializeRegAllocFastPass(Registry); + initializeRegUsageInfoCollectorPass(Registry); + initializeRegUsageInfoPropagationPass(Registry); initializeRegisterCoalescerPass(Registry); initializeRenameIndependentSubregsPass(Registry); initializeSafeStackLegacyPassPass(Registry); @@ -90,7 +97,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeStackMapLivenessPass(Registry); initializeStackProtectorPass(Registry); initializeStackSlotColoringPass(Registry); - initializeTailDuplicatePassPass(Registry); + initializeTailDuplicatePass(Registry); initializeTargetPassConfigPass(Registry); initializeTwoAddressInstructionPassPass(Registry); initializeUnpackMachineBundlesPass(Registry); @@ -98,9 +105,9 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeUnreachableMachineBlockElimPass(Registry); initializeVirtRegMapPass(Registry); initializeVirtRegRewriterPass(Registry); + initializeWasmEHPreparePass(Registry); initializeWinEHPreparePass(Registry); initializeXRayInstrumentationPass(Registry); - initializeMIRCanonicalizerPass(Registry); } void LLVMInitializeCodeGen(LLVMPassRegistryRef R) { diff --git a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp index 26ca8d4ee88c..c41beb094604 100644 --- a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -30,15 +30,16 @@ #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/ISDOpcodes.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -79,13 +80,13 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/BypassSlowDivision.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SimplifyLibCalls.h" #include <algorithm> #include <cassert> @@ -196,7 +197,7 @@ AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false), cl::desc("Allow creation of Phis in Address sinking.")); static cl::opt<bool> -AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(false), +AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(true), cl::desc("Allow creation of selects in Address sinking.")); static cl::opt<bool> AddrSinkCombineBaseReg( @@ -215,6 +216,11 @@ static cl::opt<bool> AddrSinkCombineScaledReg( "addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of ScaledReg field in Address sinking.")); +static cl::opt<bool> + EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden, + cl::init(true), + cl::desc("Enable splitting large offset of GEP.")); + namespace { using SetOfInstrs = SmallPtrSet<Instruction *, 16>; @@ -260,6 +266,20 @@ class TypePromotionTransaction; /// Keep track of sext chains based on their initial value. DenseMap<Value *, Instruction *> SeenChainsForSExt; + /// Keep track of GEPs accessing the same data structures such as structs or + /// arrays that are candidates to be split later because of their large + /// size. + DenseMap< + AssertingVH<Value>, + SmallVector<std::pair<AssertingVH<GetElementPtrInst>, int64_t>, 32>> + LargeOffsetGEPMap; + + /// Keep track of new GEP base after splitting the GEPs having large offset. + SmallSet<AssertingVH<Value>, 2> NewGEPBases; + + /// Map serial numbers to Large offset GEPs. + DenseMap<AssertingVH<GetElementPtrInst>, int> LargeOffsetGEPID; + /// Keep track of SExt promoted. ValueToSExts ValToSExtendedUses; @@ -301,16 +321,16 @@ class TypePromotionTransaction; bool isPreheader); bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT); bool optimizeInst(Instruction *I, bool &ModifiedDT); - bool optimizeMemoryInst(Instruction *I, Value *Addr, - Type *AccessTy, unsigned AS); + bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, + Type *AccessTy, unsigned AddrSpace); bool optimizeInlineAsmInst(CallInst *CS); bool optimizeCallInst(CallInst *CI, bool &ModifiedDT); bool optimizeExt(Instruction *&I); bool optimizeExtUses(Instruction *I); - bool optimizeLoadExt(LoadInst *I); + bool optimizeLoadExt(LoadInst *Load); bool optimizeSelectInst(SelectInst *SI); - bool optimizeShuffleVectorInst(ShuffleVectorInst *SI); - bool optimizeSwitchInst(SwitchInst *CI); + bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI); + bool optimizeSwitchInst(SwitchInst *SI); bool optimizeExtractElementInst(Instruction *Inst); bool dupRetToEnableTailCallOpts(BasicBlock *BB); bool placeDbgValues(Function &F); @@ -321,6 +341,7 @@ class TypePromotionTransaction; SmallVectorImpl<Instruction *> &ProfitablyMovedExts, unsigned CreatedInstsCost = 0); bool mergeSExts(Function &F); + bool splitLargeGEPOffsets(); bool performAddressTypePromotion( Instruction *&Inst, bool AllowPromotionWithoutCommonHeader, @@ -414,6 +435,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) { SeenChainsForSExt.clear(); ValToSExtendedUses.clear(); RemovedInsts.clear(); + LargeOffsetGEPMap.clear(); + LargeOffsetGEPID.clear(); for (Function::iterator I = F.begin(); I != F.end(); ) { BasicBlock *BB = &*I++; bool ModifiedDTOnIteration = false; @@ -425,6 +448,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) { } if (EnableTypePromotionMerge && !ValToSExtendedUses.empty()) MadeChange |= mergeSExts(F); + if (!LargeOffsetGEPMap.empty()) + MadeChange |= splitLargeGEPOffsets(); // Really free removed instructions during promotion. for (Instruction *I : RemovedInsts) @@ -437,7 +462,10 @@ bool CodeGenPrepare::runOnFunction(Function &F) { if (!DisableBranchOpts) { MadeChange = false; - SmallPtrSet<BasicBlock*, 8> WorkList; + // Use a set vector to get deterministic iteration order. The order the + // blocks are removed may affect whether or not PHI nodes in successors + // are removed. + SmallSetVector<BasicBlock*, 8> WorkList; for (BasicBlock &BB : F) { SmallVector<BasicBlock *, 2> Successors(succ_begin(&BB), succ_end(&BB)); MadeChange |= ConstantFoldTerminator(&BB, true); @@ -452,8 +480,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { // Delete the dead blocks and any of their dead successors. MadeChange |= !WorkList.empty(); while (!WorkList.empty()) { - BasicBlock *BB = *WorkList.begin(); - WorkList.erase(BB); + BasicBlock *BB = WorkList.pop_back_val(); SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB)); DeleteDeadBlock(BB); @@ -491,8 +518,16 @@ bool CodeGenPrepare::runOnFunction(Function &F) { bool CodeGenPrepare::eliminateFallThrough(Function &F) { bool Changed = false; // Scan all of the blocks in the function, except for the entry block. - for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) { - BasicBlock *BB = &*I++; + // Use a temporary array to avoid iterator being invalidated when + // deleting blocks. + SmallVector<WeakTrackingVH, 16> Blocks; + for (auto &Block : llvm::make_range(std::next(F.begin()), F.end())) + Blocks.push_back(&Block); + + for (auto &Block : Blocks) { + auto *BB = cast_or_null<BasicBlock>(Block); + if (!BB) + continue; // If the destination block has a single pred, then this is a trivial // edge, just collapse it. BasicBlock *SinglePred = BB->getSinglePredecessor(); @@ -503,17 +538,10 @@ bool CodeGenPrepare::eliminateFallThrough(Function &F) { BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator()); if (Term && !Term->isConditional()) { Changed = true; - DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n"); - // Remember if SinglePred was the entry block of the function. - // If so, we will need to move BB back to the entry position. - bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); - MergeBasicBlockIntoOnlyPred(BB, nullptr); - - if (isEntry && BB != &BB->getParent()->getEntryBlock()) - BB->moveBefore(&BB->getParent()->getEntryBlock()); + LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n"); - // We have erased a block. Update the iterator. - I = BB->getIterator(); + // Merge BB into SinglePred and delete it. + MergeBlockIntoPredecessor(BB); } } return Changed; @@ -566,9 +594,17 @@ bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) { } bool MadeChange = false; + // Copy blocks into a temporary array to avoid iterator invalidation issues + // as we remove them. // Note that this intentionally skips the entry block. - for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) { - BasicBlock *BB = &*I++; + SmallVector<WeakTrackingVH, 16> Blocks; + for (auto &Block : llvm::make_range(std::next(F.begin()), F.end())) + Blocks.push_back(&Block); + + for (auto &Block : Blocks) { + BasicBlock *BB = cast_or_null<BasicBlock>(Block); + if (!BB) + continue; BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB); if (!DestBB || !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB))) @@ -730,21 +766,20 @@ void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) { BranchInst *BI = cast<BranchInst>(BB->getTerminator()); BasicBlock *DestBB = BI->getSuccessor(0); - DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB); + LLVM_DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" + << *BB << *DestBB); // If the destination block has a single pred, then this is a trivial edge, // just collapse it. if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) { if (SinglePred != DestBB) { - // Remember if SinglePred was the entry block of the function. If so, we - // will need to move BB back to the entry position. - bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); - MergeBasicBlockIntoOnlyPred(DestBB, nullptr); - - if (isEntry && BB != &BB->getParent()->getEntryBlock()) - BB->moveBefore(&BB->getParent()->getEntryBlock()); - - DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); + assert(SinglePred == BB && + "Single predecessor not the same as predecessor"); + // Merge DestBB into SinglePred/BB and delete it. + MergeBlockIntoPredecessor(DestBB); + // Note: BB(=SinglePred) will not be deleted on this path. + // DestBB(=its single successor) is the one that was deleted. + LLVM_DEBUG(dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n"); return; } } @@ -782,7 +817,7 @@ void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) { BB->eraseFromParent(); ++NumBlocksElim; - DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); + LLVM_DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); } // Computes a map of base pointer relocation instructions to corresponding @@ -1024,6 +1059,7 @@ static bool SinkCast(CastInst *CI) { assert(InsertPt != UserBB->end()); InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "", &*InsertPt); + InsertedCast->setDebugLoc(CI->getDebugLoc()); } // Replace a use of the cast with a use of the new cast. @@ -1247,8 +1283,8 @@ static bool sinkAndCmp0Expression(Instruction *AndI, if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI)) return false; - DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n"); - DEBUG(AndI->getParent()->dump()); + LLVM_DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n"); + LLVM_DEBUG(AndI->getParent()->dump()); // Push the 'and' into the same block as the icmp 0. There should only be // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any @@ -1261,7 +1297,7 @@ static bool sinkAndCmp0Expression(Instruction *AndI, // Preincrement use iterator so we don't invalidate it. ++UI; - DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n"); + LLVM_DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n"); // Keep the 'and' in the same place if the use is already in the same block. Instruction *InsertPt = @@ -1275,7 +1311,7 @@ static bool sinkAndCmp0Expression(Instruction *AndI, // Replace a use of the 'and' with a use of the new 'and'. TheUse = InsertedAnd; ++NumAndUses; - DEBUG(User->getParent()->dump()); + LLVM_DEBUG(User->getParent()->dump()); } // We removed all uses, nuke the and. @@ -1388,7 +1424,7 @@ SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, /// %x.extract.shift.1 = lshr i64 %arg1, 32 /// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16 /// -/// CodeGen will recoginze the pattern in BB2 and generate BitExtract +/// CodeGen will recognize the pattern in BB2 and generate BitExtract /// instruction. /// Return true if any changes are made. static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, @@ -1434,7 +1470,7 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, // cmp i16 trunc.result, opnd2 // if (isa<TruncInst>(User) && shiftIsLegal - // If the type of the truncate is legal, no trucate will be + // If the type of the truncate is legal, no truncate will be // introduced in other basic blocks. && (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType())))) @@ -1581,7 +1617,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { // if size - offset meets the size threshold. if (!Arg->getType()->isPointerTy()) continue; - APInt Offset(DL->getPointerSizeInBits( + APInt Offset(DL->getIndexSizeInBits( cast<PointerType>(Arg->getType())->getAddressSpace()), 0); Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset); @@ -1606,11 +1642,14 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { // If this is a memcpy (or similar) then we may be able to improve the // alignment if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) { - unsigned Align = getKnownAlignment(MI->getDest(), *DL); - if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) - Align = std::min(Align, getKnownAlignment(MTI->getSource(), *DL)); - if (Align > MI->getAlignment()) - MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Align)); + unsigned DestAlign = getKnownAlignment(MI->getDest(), *DL); + if (DestAlign > MI->getDestAlignment()) + MI->setDestAlignment(DestAlign); + if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { + unsigned SrcAlign = getKnownAlignment(MTI->getSource(), *DL); + if (SrcAlign > MTI->getSourceAlignment()) + MTI->setSourceAlignment(SrcAlign); + } } } @@ -1664,7 +1703,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { InsertedInsts.insert(ExtVal); return true; } - case Intrinsic::invariant_group_barrier: + case Intrinsic::launder_invariant_group: + case Intrinsic::strip_invariant_group: II->replaceAllUsesWith(II->getArgOperand(0)); II->eraseFromParent(); return true; @@ -2018,11 +2058,11 @@ LLVM_DUMP_METHOD void ExtAddrMode::dump() const { namespace { -/// \brief This class provides transaction based operation on the IR. +/// This class provides transaction based operation on the IR. /// Every change made through this class is recorded in the internal state and /// can be undone (rollback) until commit is called. class TypePromotionTransaction { - /// \brief This represents the common interface of the individual transaction. + /// This represents the common interface of the individual transaction. /// Each class implements the logic for doing one specific modification on /// the IR via the TypePromotionTransaction. class TypePromotionAction { @@ -2031,20 +2071,20 @@ class TypePromotionTransaction { Instruction *Inst; public: - /// \brief Constructor of the action. + /// Constructor of the action. /// The constructor performs the related action on the IR. TypePromotionAction(Instruction *Inst) : Inst(Inst) {} virtual ~TypePromotionAction() = default; - /// \brief Undo the modification done by this action. + /// Undo the modification done by this action. /// When this method is called, the IR must be in the same state as it was /// before this action was applied. /// \pre Undoing the action works if and only if the IR is in the exact same /// state as it was directly after this action was applied. virtual void undo() = 0; - /// \brief Advocate every change made by this action. + /// Advocate every change made by this action. /// When the results on the IR of the action are to be kept, it is important /// to call this function, otherwise hidden information may be kept forever. virtual void commit() { @@ -2052,12 +2092,12 @@ class TypePromotionTransaction { } }; - /// \brief Utility to remember the position of an instruction. + /// Utility to remember the position of an instruction. class InsertionHandler { /// Position of an instruction. /// Either an instruction: /// - Is the first in a basic block: BB is used. - /// - Has a previous instructon: PrevInst is used. + /// - Has a previous instruction: PrevInst is used. union { Instruction *PrevInst; BasicBlock *BB; @@ -2067,7 +2107,7 @@ class TypePromotionTransaction { bool HasPrevInstruction; public: - /// \brief Record the position of \p Inst. + /// Record the position of \p Inst. InsertionHandler(Instruction *Inst) { BasicBlock::iterator It = Inst->getIterator(); HasPrevInstruction = (It != (Inst->getParent()->begin())); @@ -2077,7 +2117,7 @@ class TypePromotionTransaction { Point.BB = Inst->getParent(); } - /// \brief Insert \p Inst at the recorded position. + /// Insert \p Inst at the recorded position. void insert(Instruction *Inst) { if (HasPrevInstruction) { if (Inst->getParent()) @@ -2093,27 +2133,28 @@ class TypePromotionTransaction { } }; - /// \brief Move an instruction before another. + /// Move an instruction before another. class InstructionMoveBefore : public TypePromotionAction { /// Original position of the instruction. InsertionHandler Position; public: - /// \brief Move \p Inst before \p Before. + /// Move \p Inst before \p Before. InstructionMoveBefore(Instruction *Inst, Instruction *Before) : TypePromotionAction(Inst), Position(Inst) { - DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before << "\n"); + LLVM_DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before + << "\n"); Inst->moveBefore(Before); } - /// \brief Move the instruction back to its original position. + /// Move the instruction back to its original position. void undo() override { - DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n"); + LLVM_DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n"); Position.insert(Inst); } }; - /// \brief Set the operand of an instruction with a new value. + /// Set the operand of an instruction with a new value. class OperandSetter : public TypePromotionAction { /// Original operand of the instruction. Value *Origin; @@ -2122,35 +2163,35 @@ class TypePromotionTransaction { unsigned Idx; public: - /// \brief Set \p Idx operand of \p Inst with \p NewVal. + /// Set \p Idx operand of \p Inst with \p NewVal. OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal) : TypePromotionAction(Inst), Idx(Idx) { - DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n" - << "for:" << *Inst << "\n" - << "with:" << *NewVal << "\n"); + LLVM_DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n" + << "for:" << *Inst << "\n" + << "with:" << *NewVal << "\n"); Origin = Inst->getOperand(Idx); Inst->setOperand(Idx, NewVal); } - /// \brief Restore the original value of the instruction. + /// Restore the original value of the instruction. void undo() override { - DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n" - << "for: " << *Inst << "\n" - << "with: " << *Origin << "\n"); + LLVM_DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n" + << "for: " << *Inst << "\n" + << "with: " << *Origin << "\n"); Inst->setOperand(Idx, Origin); } }; - /// \brief Hide the operands of an instruction. + /// Hide the operands of an instruction. /// Do as if this instruction was not using any of its operands. class OperandsHider : public TypePromotionAction { /// The list of original operands. SmallVector<Value *, 4> OriginalValues; public: - /// \brief Remove \p Inst from the uses of the operands of \p Inst. + /// Remove \p Inst from the uses of the operands of \p Inst. OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) { - DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n"); + LLVM_DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n"); unsigned NumOpnds = Inst->getNumOperands(); OriginalValues.reserve(NumOpnds); for (unsigned It = 0; It < NumOpnds; ++It) { @@ -2164,114 +2205,114 @@ class TypePromotionTransaction { } } - /// \brief Restore the original list of uses. + /// Restore the original list of uses. void undo() override { - DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n"); + LLVM_DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n"); for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It) Inst->setOperand(It, OriginalValues[It]); } }; - /// \brief Build a truncate instruction. + /// Build a truncate instruction. class TruncBuilder : public TypePromotionAction { Value *Val; public: - /// \brief Build a truncate instruction of \p Opnd producing a \p Ty + /// Build a truncate instruction of \p Opnd producing a \p Ty /// result. /// trunc Opnd to Ty. TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) { IRBuilder<> Builder(Opnd); Val = Builder.CreateTrunc(Opnd, Ty, "promoted"); - DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n"); + LLVM_DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n"); } - /// \brief Get the built value. + /// Get the built value. Value *getBuiltValue() { return Val; } - /// \brief Remove the built instruction. + /// Remove the built instruction. void undo() override { - DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n"); + LLVM_DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n"); if (Instruction *IVal = dyn_cast<Instruction>(Val)) IVal->eraseFromParent(); } }; - /// \brief Build a sign extension instruction. + /// Build a sign extension instruction. class SExtBuilder : public TypePromotionAction { Value *Val; public: - /// \brief Build a sign extension instruction of \p Opnd producing a \p Ty + /// Build a sign extension instruction of \p Opnd producing a \p Ty /// result. /// sext Opnd to Ty. SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty) : TypePromotionAction(InsertPt) { IRBuilder<> Builder(InsertPt); Val = Builder.CreateSExt(Opnd, Ty, "promoted"); - DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n"); + LLVM_DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n"); } - /// \brief Get the built value. + /// Get the built value. Value *getBuiltValue() { return Val; } - /// \brief Remove the built instruction. + /// Remove the built instruction. void undo() override { - DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n"); + LLVM_DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n"); if (Instruction *IVal = dyn_cast<Instruction>(Val)) IVal->eraseFromParent(); } }; - /// \brief Build a zero extension instruction. + /// Build a zero extension instruction. class ZExtBuilder : public TypePromotionAction { Value *Val; public: - /// \brief Build a zero extension instruction of \p Opnd producing a \p Ty + /// Build a zero extension instruction of \p Opnd producing a \p Ty /// result. /// zext Opnd to Ty. ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty) : TypePromotionAction(InsertPt) { IRBuilder<> Builder(InsertPt); Val = Builder.CreateZExt(Opnd, Ty, "promoted"); - DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n"); + LLVM_DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n"); } - /// \brief Get the built value. + /// Get the built value. Value *getBuiltValue() { return Val; } - /// \brief Remove the built instruction. + /// Remove the built instruction. void undo() override { - DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n"); + LLVM_DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n"); if (Instruction *IVal = dyn_cast<Instruction>(Val)) IVal->eraseFromParent(); } }; - /// \brief Mutate an instruction to another type. + /// Mutate an instruction to another type. class TypeMutator : public TypePromotionAction { /// Record the original type. Type *OrigTy; public: - /// \brief Mutate the type of \p Inst into \p NewTy. + /// Mutate the type of \p Inst into \p NewTy. TypeMutator(Instruction *Inst, Type *NewTy) : TypePromotionAction(Inst), OrigTy(Inst->getType()) { - DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy - << "\n"); + LLVM_DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy + << "\n"); Inst->mutateType(NewTy); } - /// \brief Mutate the instruction back to its original type. + /// Mutate the instruction back to its original type. void undo() override { - DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy - << "\n"); + LLVM_DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy + << "\n"); Inst->mutateType(OrigTy); } }; - /// \brief Replace the uses of an instruction by another instruction. + /// Replace the uses of an instruction by another instruction. class UsesReplacer : public TypePromotionAction { /// Helper structure to keep track of the replaced uses. struct InstructionAndIdx { @@ -2291,10 +2332,10 @@ class TypePromotionTransaction { using use_iterator = SmallVectorImpl<InstructionAndIdx>::iterator; public: - /// \brief Replace all the use of \p Inst by \p New. + /// Replace all the use of \p Inst by \p New. UsesReplacer(Instruction *Inst, Value *New) : TypePromotionAction(Inst) { - DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New - << "\n"); + LLVM_DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New + << "\n"); // Record the original uses. for (Use &U : Inst->uses()) { Instruction *UserI = cast<Instruction>(U.getUser()); @@ -2304,9 +2345,9 @@ class TypePromotionTransaction { Inst->replaceAllUsesWith(New); } - /// \brief Reassign the original uses of Inst to Inst. + /// Reassign the original uses of Inst to Inst. void undo() override { - DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n"); + LLVM_DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n"); for (use_iterator UseIt = OriginalUses.begin(), EndIt = OriginalUses.end(); UseIt != EndIt; ++UseIt) { @@ -2315,7 +2356,7 @@ class TypePromotionTransaction { } }; - /// \brief Remove an instruction from the IR. + /// Remove an instruction from the IR. class InstructionRemover : public TypePromotionAction { /// Original position of the instruction. InsertionHandler Inserter; @@ -2331,7 +2372,7 @@ class TypePromotionTransaction { SetOfInstrs &RemovedInsts; public: - /// \brief Remove all reference of \p Inst and optinally replace all its + /// Remove all reference of \p Inst and optionally replace all its /// uses with New. /// \p RemovedInsts Keep track of the instructions removed by this Action. /// \pre If !Inst->use_empty(), then New != nullptr @@ -2341,7 +2382,7 @@ class TypePromotionTransaction { RemovedInsts(RemovedInsts) { if (New) Replacer = new UsesReplacer(Inst, New); - DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n"); + LLVM_DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n"); RemovedInsts.insert(Inst); /// The instructions removed here will be freed after completing /// optimizeBlock() for all blocks as we need to keep track of the @@ -2351,10 +2392,10 @@ class TypePromotionTransaction { ~InstructionRemover() override { delete Replacer; } - /// \brief Resurrect the instruction and reassign it to the proper uses if + /// Resurrect the instruction and reassign it to the proper uses if /// new value was provided when build this action. void undo() override { - DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n"); + LLVM_DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n"); Inserter.insert(Inst); if (Replacer) Replacer->undo(); @@ -2496,7 +2537,7 @@ void TypePromotionTransaction::rollback( namespace { -/// \brief A helper class for matching addressing modes. +/// A helper class for matching addressing modes. /// /// This encapsulates the logic for matching the target-legal addressing modes. class AddressingModeMatcher { @@ -2524,22 +2565,23 @@ class AddressingModeMatcher { /// The ongoing transaction where every action should be registered. TypePromotionTransaction &TPT; + // A GEP which has too large offset to be folded into the addressing mode. + std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP; + /// This is set to true when we should not do profitability checks. /// When true, IsProfitableToFoldIntoAddressingMode always returns true. bool IgnoreProfitability; - AddressingModeMatcher(SmallVectorImpl<Instruction *> &AMI, - const TargetLowering &TLI, - const TargetRegisterInfo &TRI, - Type *AT, unsigned AS, - Instruction *MI, ExtAddrMode &AM, - const SetOfInstrs &InsertedInsts, - InstrToOrigTy &PromotedInsts, - TypePromotionTransaction &TPT) + AddressingModeMatcher( + SmallVectorImpl<Instruction *> &AMI, const TargetLowering &TLI, + const TargetRegisterInfo &TRI, Type *AT, unsigned AS, Instruction *MI, + ExtAddrMode &AM, const SetOfInstrs &InsertedInsts, + InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT, + std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP) : AddrModeInsts(AMI), TLI(TLI), TRI(TRI), DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts), - PromotedInsts(PromotedInsts), TPT(TPT) { + PromotedInsts(PromotedInsts), TPT(TPT), LargeOffsetGEP(LargeOffsetGEP) { IgnoreProfitability = false; } @@ -2551,28 +2593,27 @@ public: /// optimizations. /// \p PromotedInsts maps the instructions to their type before promotion. /// \p The ongoing transaction where every action should be registered. - static ExtAddrMode Match(Value *V, Type *AccessTy, unsigned AS, - Instruction *MemoryInst, - SmallVectorImpl<Instruction*> &AddrModeInsts, - const TargetLowering &TLI, - const TargetRegisterInfo &TRI, - const SetOfInstrs &InsertedInsts, - InstrToOrigTy &PromotedInsts, - TypePromotionTransaction &TPT) { + static ExtAddrMode + Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst, + SmallVectorImpl<Instruction *> &AddrModeInsts, + const TargetLowering &TLI, const TargetRegisterInfo &TRI, + const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts, + TypePromotionTransaction &TPT, + std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP) { ExtAddrMode Result; - bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, - AccessTy, AS, + bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, AccessTy, AS, MemoryInst, Result, InsertedInsts, - PromotedInsts, TPT).matchAddr(V, 0); + PromotedInsts, TPT, LargeOffsetGEP) + .matchAddr(V, 0); (void)Success; assert(Success && "Couldn't select *anything*?"); return Result; } private: bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth); - bool matchAddr(Value *V, unsigned Depth); - bool matchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth, + bool matchAddr(Value *Addr, unsigned Depth); + bool matchOperationAddr(User *AddrInst, unsigned Opcode, unsigned Depth, bool *MovedAway = nullptr); bool isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, @@ -2582,20 +2623,21 @@ private: Value *PromotedOperand) const; }; -/// \brief Keep track of simplification of Phi nodes. +/// Keep track of simplification of Phi nodes. /// Accept the set of all phi nodes and erase phi node from this set /// if it is simplified. class SimplificationTracker { DenseMap<Value *, Value *> Storage; const SimplifyQuery &SQ; - SmallPtrSetImpl<PHINode *> &AllPhiNodes; - SmallPtrSetImpl<SelectInst *> &AllSelectNodes; + // Tracks newly created Phi nodes. We use a SetVector to get deterministic + // order when iterating over the set in MatchPhiSet. + SmallSetVector<PHINode *, 32> AllPhiNodes; + // Tracks newly created Select nodes. + SmallPtrSet<SelectInst *, 32> AllSelectNodes; public: - SimplificationTracker(const SimplifyQuery &sq, - SmallPtrSetImpl<PHINode *> &APN, - SmallPtrSetImpl<SelectInst *> &ASN) - : SQ(sq), AllPhiNodes(APN), AllSelectNodes(ASN) {} + SimplificationTracker(const SimplifyQuery &sq) + : SQ(sq) {} Value *Get(Value *V) { do { @@ -2621,7 +2663,7 @@ public: Put(PI, V); PI->replaceAllUsesWith(V); if (auto *PHI = dyn_cast<PHINode>(PI)) - AllPhiNodes.erase(PHI); + AllPhiNodes.remove(PHI); if (auto *Select = dyn_cast<SelectInst>(PI)) AllSelectNodes.erase(Select); PI->eraseFromParent(); @@ -2633,9 +2675,48 @@ public: void Put(Value *From, Value *To) { Storage.insert({ From, To }); } + + void ReplacePhi(PHINode *From, PHINode *To) { + Value* OldReplacement = Get(From); + while (OldReplacement != From) { + From = To; + To = dyn_cast<PHINode>(OldReplacement); + OldReplacement = Get(From); + } + assert(Get(To) == To && "Replacement PHI node is already replaced."); + Put(From, To); + From->replaceAllUsesWith(To); + AllPhiNodes.remove(From); + From->eraseFromParent(); + } + + SmallSetVector<PHINode *, 32>& newPhiNodes() { return AllPhiNodes; } + + void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); } + + void insertNewSelect(SelectInst *SI) { AllSelectNodes.insert(SI); } + + unsigned countNewPhiNodes() const { return AllPhiNodes.size(); } + + unsigned countNewSelectNodes() const { return AllSelectNodes.size(); } + + void destroyNewNodes(Type *CommonType) { + // For safe erasing, replace the uses with dummy value first. + auto Dummy = UndefValue::get(CommonType); + for (auto I : AllPhiNodes) { + I->replaceAllUsesWith(Dummy); + I->eraseFromParent(); + } + AllPhiNodes.clear(); + for (auto I : AllSelectNodes) { + I->replaceAllUsesWith(Dummy); + I->eraseFromParent(); + } + AllSelectNodes.clear(); + } }; -/// \brief A helper class for combining addressing modes. +/// A helper class for combining addressing modes. class AddressingModeCombiner { typedef std::pair<Value *, BasicBlock *> ValueInBB; typedef DenseMap<ValueInBB, Value *> FoldAddrToValueMapping; @@ -2664,12 +2745,12 @@ public: AddressingModeCombiner(const SimplifyQuery &_SQ, ValueInBB OriginalValue) : CommonType(nullptr), SQ(_SQ), Original(OriginalValue) {} - /// \brief Get the combined AddrMode + /// Get the combined AddrMode const ExtAddrMode &getAddrMode() const { return AddrModes[0]; } - /// \brief Add a new AddrMode if it's compatible with the AddrModes we already + /// Add a new AddrMode if it's compatible with the AddrModes we already /// have. /// \return True iff we succeeded in doing so. bool addNewAddrMode(ExtAddrMode &NewAddrMode) { @@ -2694,29 +2775,35 @@ public: else if (DifferentField != ThisDifferentField) DifferentField = ExtAddrMode::MultipleFields; - // If NewAddrMode differs in only one dimension, and that dimension isn't - // the amount that ScaledReg is scaled by, then we can handle it by - // inserting a phi/select later on. Even if NewAddMode is the same - // we still need to collect it due to original value is different. - // And later we will need all original values as anchors during - // finding the common Phi node. + // If NewAddrMode differs in more than one dimension we cannot handle it. + bool CanHandle = DifferentField != ExtAddrMode::MultipleFields; + + // If Scale Field is different then we reject. + CanHandle = CanHandle && DifferentField != ExtAddrMode::ScaleField; + // We also must reject the case when base offset is different and // scale reg is not null, we cannot handle this case due to merge of // different offsets will be used as ScaleReg. - if (DifferentField != ExtAddrMode::MultipleFields && - DifferentField != ExtAddrMode::ScaleField && - (DifferentField != ExtAddrMode::BaseOffsField || - !NewAddrMode.ScaledReg)) { + CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseOffsField || + !NewAddrMode.ScaledReg); + + // We also must reject the case when GV is different and BaseReg installed + // due to we want to use base reg as a merge of GV values. + CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseGVField || + !NewAddrMode.HasBaseReg); + + // Even if NewAddMode is the same we still need to collect it due to + // original value is different. And later we will need all original values + // as anchors during finding the common Phi node. + if (CanHandle) AddrModes.emplace_back(NewAddrMode); - return true; - } + else + AddrModes.clear(); - // We couldn't combine NewAddrMode with the rest, so return failure. - AddrModes.clear(); - return false; + return CanHandle; } - /// \brief Combine the addressing modes we've collected into a single + /// Combine the addressing modes we've collected into a single /// addressing mode. /// \return True iff we successfully combined them or we only had one so /// didn't need to combine them anyway. @@ -2751,7 +2838,7 @@ public: } private: - /// \brief Initialize Map with anchor values. For address seen in some BB + /// Initialize Map with anchor values. For address seen in some BB /// we set the value of different field saw in this address. /// If address is not an instruction than basic block is set to null. /// At the same time we find a common type for different field we will @@ -2784,9 +2871,9 @@ private: return true; } - /// \brief We have mapping between value A and basic block where value A + /// We have mapping between value A and basic block where value A /// seen to other value B where B was a field in addressing mode represented - /// by A. Also we have an original value C representin an address in some + /// by A. Also we have an original value C representing an address in some /// basic block. Traversing from C through phi and selects we ended up with /// A's in a map. This utility function tries to find a value V which is a /// field in addressing mode C and traversing through phi nodes and selects @@ -2809,62 +2896,46 @@ private: // <p, BB3> -> ? // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3 Value *findCommon(FoldAddrToValueMapping &Map) { - // Tracks of new created Phi nodes. - SmallPtrSet<PHINode *, 32> NewPhiNodes; - // Tracks of new created Select nodes. - SmallPtrSet<SelectInst *, 32> NewSelectNodes; - // Tracks the simplification of new created phi nodes. The reason we use + // Tracks the simplification of newly created phi nodes. The reason we use // this mapping is because we will add new created Phi nodes in AddrToBase. // Simplification of Phi nodes is recursive, so some Phi node may // be simplified after we added it to AddrToBase. // Using this mapping we can find the current value in AddrToBase. - SimplificationTracker ST(SQ, NewPhiNodes, NewSelectNodes); + SimplificationTracker ST(SQ); // First step, DFS to create PHI nodes for all intermediate blocks. // Also fill traverse order for the second step. SmallVector<ValueInBB, 32> TraverseOrder; - InsertPlaceholders(Map, TraverseOrder, NewPhiNodes, NewSelectNodes); + InsertPlaceholders(Map, TraverseOrder, ST); // Second Step, fill new nodes by merged values and simplify if possible. FillPlaceholders(Map, TraverseOrder, ST); - if (!AddrSinkNewSelects && NewSelectNodes.size() > 0) { - DestroyNodes(NewPhiNodes); - DestroyNodes(NewSelectNodes); + if (!AddrSinkNewSelects && ST.countNewSelectNodes() > 0) { + ST.destroyNewNodes(CommonType); return nullptr; } // Now we'd like to match New Phi nodes to existed ones. unsigned PhiNotMatchedCount = 0; - if (!MatchPhiSet(NewPhiNodes, ST, AddrSinkNewPhis, PhiNotMatchedCount)) { - DestroyNodes(NewPhiNodes); - DestroyNodes(NewSelectNodes); + if (!MatchPhiSet(ST, AddrSinkNewPhis, PhiNotMatchedCount)) { + ST.destroyNewNodes(CommonType); return nullptr; } auto *Result = ST.Get(Map.find(Original)->second); if (Result) { - NumMemoryInstsPhiCreated += NewPhiNodes.size() + PhiNotMatchedCount; - NumMemoryInstsSelectCreated += NewSelectNodes.size(); + NumMemoryInstsPhiCreated += ST.countNewPhiNodes() + PhiNotMatchedCount; + NumMemoryInstsSelectCreated += ST.countNewSelectNodes(); } return Result; } - /// \brief Destroy nodes from a set. - template <typename T> void DestroyNodes(SmallPtrSetImpl<T *> &Instructions) { - // For safe erasing, replace the Phi with dummy value first. - auto Dummy = UndefValue::get(CommonType); - for (auto I : Instructions) { - I->replaceAllUsesWith(Dummy); - I->eraseFromParent(); - } - } - - /// \brief Try to match PHI node to Candidate. + /// Try to match PHI node to Candidate. /// Matcher tracks the matched Phi nodes. bool MatchPhiNode(PHINode *PHI, PHINode *Candidate, - DenseSet<PHIPair> &Matcher, - SmallPtrSetImpl<PHINode *> &PhiNodesToMatch) { + SmallSetVector<PHIPair, 8> &Matcher, + SmallSetVector<PHINode *, 32> &PhiNodesToMatch) { SmallVector<PHIPair, 8> WorkList; Matcher.insert({ PHI, Candidate }); WorkList.push_back({ PHI, Candidate }); @@ -2908,13 +2979,16 @@ private: return true; } - /// \brief For the given set of PHI nodes try to find their equivalents. + /// For the given set of PHI nodes (in the SimplificationTracker) try + /// to find their equivalents. /// Returns false if this matching fails and creation of new Phi is disabled. - bool MatchPhiSet(SmallPtrSetImpl<PHINode *> &PhiNodesToMatch, - SimplificationTracker &ST, bool AllowNewPhiNodes, + bool MatchPhiSet(SimplificationTracker &ST, bool AllowNewPhiNodes, unsigned &PhiNotMatchedCount) { - DenseSet<PHIPair> Matched; + // Use a SetVector for Matched to make sure we do replacements (ReplacePhi) + // in a deterministic order below. + SmallSetVector<PHIPair, 8> Matched; SmallPtrSet<PHINode *, 8> WillNotMatch; + SmallSetVector<PHINode *, 32> &PhiNodesToMatch = ST.newPhiNodes(); while (PhiNodesToMatch.size()) { PHINode *PHI = *PhiNodesToMatch.begin(); @@ -2938,12 +3012,8 @@ private: } if (IsMatched) { // Replace all matched values and erase them. - for (auto MV : Matched) { - MV.first->replaceAllUsesWith(MV.second); - PhiNodesToMatch.erase(MV.first); - ST.Put(MV.first, MV.second); - MV.first->eraseFromParent(); - } + for (auto MV : Matched) + ST.ReplacePhi(MV.first, MV.second); Matched.clear(); continue; } @@ -2953,11 +3023,11 @@ private: // Just remove all seen values in matcher. They will not match anything. PhiNotMatchedCount += WillNotMatch.size(); for (auto *P : WillNotMatch) - PhiNodesToMatch.erase(P); + PhiNodesToMatch.remove(P); } return true; } - /// \brief Fill the placeholder with values from predecessors and simplify it. + /// Fill the placeholder with values from predecessors and simplify it. void FillPlaceholders(FoldAddrToValueMapping &Map, SmallVectorImpl<ValueInBB> &TraverseOrder, SimplificationTracker &ST) { @@ -3011,8 +3081,7 @@ private: /// Also reports and order in what basic blocks have been traversed. void InsertPlaceholders(FoldAddrToValueMapping &Map, SmallVectorImpl<ValueInBB> &TraverseOrder, - SmallPtrSetImpl<PHINode *> &NewPhiNodes, - SmallPtrSetImpl<SelectInst *> &NewSelectNodes) { + SimplificationTracker &ST) { SmallVector<ValueInBB, 32> Worklist; assert((isa<PHINode>(Original.first) || isa<SelectInst>(Original.first)) && "Address must be a Phi or Select node"); @@ -3038,8 +3107,7 @@ private: Instruction *CurrentI = cast<Instruction>(CurrentValue); bool IsDefinedInThisBB = CurrentI->getParent() == CurrentBlock; - unsigned PredCount = - std::distance(pred_begin(CurrentBlock), pred_end(CurrentBlock)); + unsigned PredCount = pred_size(CurrentBlock); // if Current Value is not defined in this basic block we are interested // in values in predecessors. if (!IsDefinedInThisBB) { @@ -3047,7 +3115,7 @@ private: PHINode *PHI = PHINode::Create(CommonType, PredCount, "sunk_phi", &CurrentBlock->front()); Map[Current] = PHI; - NewPhiNodes.insert(PHI); + ST.insertNewPhi(PHI); // Add all predecessors in work list. for (auto B : predecessors(CurrentBlock)) Worklist.push_back({ CurrentValue, B }); @@ -3061,7 +3129,7 @@ private: SelectInst::Create(OrigSelect->getCondition(), Dummy, Dummy, OrigSelect->getName(), OrigSelect, OrigSelect); Map[Current] = Select; - NewSelectNodes.insert(Select); + ST.insertNewSelect(Select); // We are interested in True and False value in this basic block. Worklist.push_back({ OrigSelect->getTrueValue(), CurrentBlock }); Worklist.push_back({ OrigSelect->getFalseValue(), CurrentBlock }); @@ -3073,7 +3141,7 @@ private: PHINode *PHI = PHINode::Create(CommonType, PredCount, "sunk_phi", &CurrentBlock->front()); Map[Current] = PHI; - NewPhiNodes.insert(PHI); + ST.insertNewPhi(PHI); // Add all predecessors in work list. for (auto B : predecessors(CurrentBlock)) @@ -3167,7 +3235,7 @@ static bool MightBeFoldableInst(Instruction *I) { // Don't touch identity bitcasts. if (I->getType() == I->getOperand(0)->getType()) return false; - return I->getType()->isPointerTy() || I->getType()->isIntegerTy(); + return I->getType()->isIntOrPtrTy(); case Instruction::PtrToInt: // PtrToInt is always a noop, as we know that the int type is pointer sized. return true; @@ -3187,7 +3255,7 @@ static bool MightBeFoldableInst(Instruction *I) { } } -/// \brief Check whether or not \p Val is a legal instruction for \p TLI. +/// Check whether or not \p Val is a legal instruction for \p TLI. /// \note \p Val is assumed to be the product of some type promotion. /// Therefore if \p Val has an undefined state in \p TLI, this is assumed /// to be legal, as the non-promoted value would have had the same state. @@ -3207,9 +3275,9 @@ static bool isPromotedInstructionLegal(const TargetLowering &TLI, namespace { -/// \brief Hepler class to perform type promotion. +/// Hepler class to perform type promotion. class TypePromotionHelper { - /// \brief Utility function to check whether or not a sign or zero extension + /// Utility function to check whether or not a sign or zero extension /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by /// either using the operands of \p Inst or promoting \p Inst. /// The type of the extension is defined by \p IsSExt. @@ -3223,13 +3291,13 @@ class TypePromotionHelper { static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType, const InstrToOrigTy &PromotedInsts, bool IsSExt); - /// \brief Utility function to determine if \p OpIdx should be promoted when + /// Utility function to determine if \p OpIdx should be promoted when /// promoting \p Inst. static bool shouldExtOperand(const Instruction *Inst, int OpIdx) { return !(isa<SelectInst>(Inst) && OpIdx == 0); } - /// \brief Utility function to promote the operand of \p Ext when this + /// Utility function to promote the operand of \p Ext when this /// operand is a promotable trunc or sext or zext. /// \p PromotedInsts maps the instructions to their type before promotion. /// \p CreatedInstsCost[out] contains the cost of all instructions @@ -3244,7 +3312,7 @@ class TypePromotionHelper { SmallVectorImpl<Instruction *> *Exts, SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI); - /// \brief Utility function to promote the operand of \p Ext when this + /// Utility function to promote the operand of \p Ext when this /// operand is promotable and is not a supported trunc or sext. /// \p PromotedInsts maps the instructions to their type before promotion. /// \p CreatedInstsCost[out] contains the cost of all the instructions @@ -3290,7 +3358,7 @@ public: SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI); - /// \brief Given a sign/zero extend instruction \p Ext, return the approriate + /// Given a sign/zero extend instruction \p Ext, return the appropriate /// action to promote the operand of \p Ext instead of using Ext. /// \return NULL if no promotable action is possible with the current /// sign extension. @@ -3332,6 +3400,47 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst, (IsSExt && BinOp->hasNoSignedWrap()))) return true; + // ext(and(opnd, cst)) --> and(ext(opnd), ext(cst)) + if ((Inst->getOpcode() == Instruction::And || + Inst->getOpcode() == Instruction::Or)) + return true; + + // ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst)) + if (Inst->getOpcode() == Instruction::Xor) { + const ConstantInt *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1)); + // Make sure it is not a NOT. + if (Cst && !Cst->getValue().isAllOnesValue()) + return true; + } + + // zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst)) + // It may change a poisoned value into a regular value, like + // zext i32 (shrl i8 %val, 12) --> shrl i32 (zext i8 %val), 12 + // poisoned value regular value + // It should be OK since undef covers valid value. + if (Inst->getOpcode() == Instruction::LShr && !IsSExt) + return true; + + // and(ext(shl(opnd, cst)), cst) --> and(shl(ext(opnd), ext(cst)), cst) + // It may change a poisoned value into a regular value, like + // zext i32 (shl i8 %val, 12) --> shl i32 (zext i8 %val), 12 + // poisoned value regular value + // It should be OK since undef covers valid value. + if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) { + const Instruction *ExtInst = + dyn_cast<const Instruction>(*Inst->user_begin()); + if (ExtInst->hasOneUse()) { + const Instruction *AndInst = + dyn_cast<const Instruction>(*ExtInst->user_begin()); + if (AndInst && AndInst->getOpcode() == Instruction::And) { + const ConstantInt *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1)); + if (Cst && + Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth())) + return true; + } + } + } + // Check if we can do the following simplification. // ext(trunc(opnd)) --> ext(opnd) if (!isa<TruncInst>(Inst)) @@ -3496,19 +3605,19 @@ Value *TypePromotionHelper::promoteOperandForOther( // Step #3. Instruction *ExtForOpnd = Ext; - DEBUG(dbgs() << "Propagate Ext to operands\n"); + LLVM_DEBUG(dbgs() << "Propagate Ext to operands\n"); for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx; ++OpIdx) { - DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n'); + LLVM_DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n'); if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() || !shouldExtOperand(ExtOpnd, OpIdx)) { - DEBUG(dbgs() << "No need to propagate\n"); + LLVM_DEBUG(dbgs() << "No need to propagate\n"); continue; } // Check if we can statically extend the operand. Value *Opnd = ExtOpnd->getOperand(OpIdx); if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) { - DEBUG(dbgs() << "Statically extend\n"); + LLVM_DEBUG(dbgs() << "Statically extend\n"); unsigned BitWidth = Ext->getType()->getIntegerBitWidth(); APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth) : Cst->getValue().zext(BitWidth); @@ -3517,16 +3626,16 @@ Value *TypePromotionHelper::promoteOperandForOther( } // UndefValue are typed, so we have to statically sign extend them. if (isa<UndefValue>(Opnd)) { - DEBUG(dbgs() << "Statically extend\n"); + LLVM_DEBUG(dbgs() << "Statically extend\n"); TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType())); continue; } - // Otherwise we have to explicity sign extend the operand. + // Otherwise we have to explicitly sign extend the operand. // Check if Ext was reused to extend an operand. if (!ExtForOpnd) { // If yes, create a new one. - DEBUG(dbgs() << "More operands to ext\n"); + LLVM_DEBUG(dbgs() << "More operands to ext\n"); Value *ValForExtOpnd = IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType()) : TPT.createZExt(Ext, Opnd, Ext->getType()); if (!isa<Instruction>(ValForExtOpnd)) { @@ -3547,7 +3656,7 @@ Value *TypePromotionHelper::promoteOperandForOther( ExtForOpnd = nullptr; } if (ExtForOpnd == Ext) { - DEBUG(dbgs() << "Extension is useless now\n"); + LLVM_DEBUG(dbgs() << "Extension is useless now\n"); TPT.eraseInstruction(Ext); } return ExtOpnd; @@ -3563,7 +3672,8 @@ Value *TypePromotionHelper::promoteOperandForOther( /// \return True if the promotion is profitable, false otherwise. bool AddressingModeMatcher::isPromotionProfitable( unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const { - DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost << '\n'); + LLVM_DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost + << '\n'); // The cost of the new extensions is greater than the cost of the // old extension plus what we folded. // This is not profitable. @@ -3613,8 +3723,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, case Instruction::BitCast: // BitCast is always a noop, and we can handle it as long as it is // int->int or pointer->pointer (we don't want int<->fp or something). - if ((AddrInst->getOperand(0)->getType()->isPointerTy() || - AddrInst->getOperand(0)->getType()->isIntegerTy()) && + if (AddrInst->getOperand(0)->getType()->isIntOrPtrTy() && // Don't touch identity bitcasts. These were probably put here by LSR, // and we don't want to mess around with them. Assume it knows what it // is doing. @@ -3714,6 +3823,30 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, // Check to see if we can fold the base pointer in too. if (matchAddr(AddrInst->getOperand(0), Depth+1)) return true; + } else if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) && + TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 && + ConstantOffset > 0) { + // Record GEPs with non-zero offsets as candidates for splitting in the + // event that the offset cannot fit into the r+i addressing mode. + // Simple and common case that only one GEP is used in calculating the + // address for the memory access. + Value *Base = AddrInst->getOperand(0); + auto *BaseI = dyn_cast<Instruction>(Base); + auto *GEP = cast<GetElementPtrInst>(AddrInst); + if (isa<Argument>(Base) || isa<GlobalValue>(Base) || + (BaseI && !isa<CastInst>(BaseI) && + !isa<GetElementPtrInst>(BaseI))) { + // If the base is an instruction, make sure the GEP is not in the same + // basic block as the base. If the base is an argument or global + // value, make sure the GEP is not in the entry block. Otherwise, + // instruction selection can undo the split. Also make sure the + // parent block allows inserting non-PHI instructions before the + // terminator. + BasicBlock *Parent = + BaseI ? BaseI->getParent() : &GEP->getFunction()->getEntryBlock(); + if (GEP->getParent() != Parent && !Parent->getTerminator()->isEHPad()) + LargeOffsetGEP = std::make_pair(GEP, ConstantOffset); + } } AddrMode.BaseOffs -= ConstantOffset; return false; @@ -3810,7 +3943,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, PromotedOperand)) { AddrMode = BackupAddrMode; AddrModeInsts.resize(OldSize); - DEBUG(dbgs() << "Sign extension does not pay off: rollback\n"); + LLVM_DEBUG(dbgs() << "Sign extension does not pay off: rollback\n"); TPT.rollback(LastKnownGood); return false; } @@ -4124,12 +4257,13 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, // will tell us if the addressing mode for the memory operation will // *actually* cover the shared instruction. ExtAddrMode Result; + std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr, + 0); TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); - AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI, - AddressAccessTy, AS, - MemoryInst, Result, InsertedInsts, - PromotedInsts, TPT); + AddressingModeMatcher Matcher( + MatchedAddrModeInsts, TLI, TRI, AddressAccessTy, AS, MemoryInst, Result, + InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP); Matcher.IgnoreProfitability = true; bool Success = Matcher.matchAddr(Address, 0); (void)Success; assert(Success && "Couldn't select *anything*?"); @@ -4231,11 +4365,24 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // the result may differ depending on what other uses our candidate // addressing instructions might have. AddrModeInsts.clear(); + std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr, + 0); ExtAddrMode NewAddrMode = AddressingModeMatcher::Match( V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *TRI, - InsertedInsts, PromotedInsts, TPT); - NewAddrMode.OriginalValue = V; + InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP); + + GetElementPtrInst *GEP = LargeOffsetGEP.first; + if (GEP && GEP->getParent() != MemoryInst->getParent() && + !NewGEPBases.count(GEP)) { + // If splitting the underlying data structure can reduce the offset of a + // GEP, collect the GEP. Skip the GEPs that are the new bases of + // previously split data structures. + LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP); + if (LargeOffsetGEPID.find(GEP) == LargeOffsetGEPID.end()) + LargeOffsetGEPID[GEP] = LargeOffsetGEPID.size(); + } + NewAddrMode.OriginalValue = V; if (!AddrModes.addNewAddrMode(NewAddrMode)) break; } @@ -4259,7 +4406,8 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, if (!PhiOrSelectSeen && none_of(AddrModeInsts, [&](Value *V) { return IsNonLocalValue(V, MemoryInst->getParent()); })) { - DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n"); + LLVM_DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode + << "\n"); return false; } @@ -4278,17 +4426,16 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Value * SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr; if (SunkAddr) { - DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " - << *MemoryInst << "\n"); + LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode + << " for " << *MemoryInst << "\n"); if (SunkAddr->getType() != Addr->getType()) SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType()); } else if (AddrSinkUsingGEPs || - (!AddrSinkUsingGEPs.getNumOccurrences() && TM && - SubtargetInfo->useAA())) { + (!AddrSinkUsingGEPs.getNumOccurrences() && TM && TTI->useAA())) { // By default, we use the GEP-based method when AA is used later. This // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities. - DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " - << *MemoryInst << "\n"); + LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode + << " for " << *MemoryInst << "\n"); Type *IntPtrTy = DL->getIntPtrType(Addr->getType()); Value *ResultPtr = nullptr, *ResultIndex = nullptr; @@ -4427,8 +4574,8 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, DL->isNonIntegralPointerType(AddrMode.BaseGV->getType()))) return false; - DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " - << *MemoryInst << "\n"); + LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode + << " for " << *MemoryInst << "\n"); Type *IntPtrTy = DL->getIntPtrType(Addr->getType()); Value *Result = nullptr; @@ -4554,7 +4701,7 @@ bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) { return MadeChange; } -/// \brief Check if all the uses of \p Val are equivalent (or free) zero or +/// Check if all the uses of \p Val are equivalent (or free) zero or /// sign extensions. static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) { assert(!Val->use_empty() && "Input must have at least one use"); @@ -4602,7 +4749,7 @@ static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) { return true; } -/// \brief Try to speculatively promote extensions in \p Exts and continue +/// Try to speculatively promote extensions in \p Exts and continue /// promoting through newly promoted operands recursively as far as doing so is /// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts. /// When some promotion happened, \p TPT contains the proper state to revert @@ -4728,7 +4875,7 @@ bool CodeGenPrepare::mergeSExts(Function &F) { } if (!DT.dominates(Pt, Inst)) // Give up if we need to merge in a common dominator as the - // expermients show it is not profitable. + // experiments show it is not profitable. continue; Inst->replaceAllUsesWith(Pt); RemovedInsts.insert(Inst); @@ -4744,6 +4891,154 @@ bool CodeGenPrepare::mergeSExts(Function &F) { return Changed; } +// Spliting large data structures so that the GEPs accessing them can have +// smaller offsets so that they can be sunk to the same blocks as their users. +// For example, a large struct starting from %base is splitted into two parts +// where the second part starts from %new_base. +// +// Before: +// BB0: +// %base = +// +// BB1: +// %gep0 = gep %base, off0 +// %gep1 = gep %base, off1 +// %gep2 = gep %base, off2 +// +// BB2: +// %load1 = load %gep0 +// %load2 = load %gep1 +// %load3 = load %gep2 +// +// After: +// BB0: +// %base = +// %new_base = gep %base, off0 +// +// BB1: +// %new_gep0 = %new_base +// %new_gep1 = gep %new_base, off1 - off0 +// %new_gep2 = gep %new_base, off2 - off0 +// +// BB2: +// %load1 = load i32, i32* %new_gep0 +// %load2 = load i32, i32* %new_gep1 +// %load3 = load i32, i32* %new_gep2 +// +// %new_gep1 and %new_gep2 can be sunk to BB2 now after the splitting because +// their offsets are smaller enough to fit into the addressing mode. +bool CodeGenPrepare::splitLargeGEPOffsets() { + bool Changed = false; + for (auto &Entry : LargeOffsetGEPMap) { + Value *OldBase = Entry.first; + SmallVectorImpl<std::pair<AssertingVH<GetElementPtrInst>, int64_t>> + &LargeOffsetGEPs = Entry.second; + auto compareGEPOffset = + [&](const std::pair<GetElementPtrInst *, int64_t> &LHS, + const std::pair<GetElementPtrInst *, int64_t> &RHS) { + if (LHS.first == RHS.first) + return false; + if (LHS.second != RHS.second) + return LHS.second < RHS.second; + return LargeOffsetGEPID[LHS.first] < LargeOffsetGEPID[RHS.first]; + }; + // Sorting all the GEPs of the same data structures based on the offsets. + llvm::sort(LargeOffsetGEPs.begin(), LargeOffsetGEPs.end(), + compareGEPOffset); + LargeOffsetGEPs.erase( + std::unique(LargeOffsetGEPs.begin(), LargeOffsetGEPs.end()), + LargeOffsetGEPs.end()); + // Skip if all the GEPs have the same offsets. + if (LargeOffsetGEPs.front().second == LargeOffsetGEPs.back().second) + continue; + GetElementPtrInst *BaseGEP = LargeOffsetGEPs.begin()->first; + int64_t BaseOffset = LargeOffsetGEPs.begin()->second; + Value *NewBaseGEP = nullptr; + + auto LargeOffsetGEP = LargeOffsetGEPs.begin(); + while (LargeOffsetGEP != LargeOffsetGEPs.end()) { + GetElementPtrInst *GEP = LargeOffsetGEP->first; + int64_t Offset = LargeOffsetGEP->second; + if (Offset != BaseOffset) { + TargetLowering::AddrMode AddrMode; + AddrMode.BaseOffs = Offset - BaseOffset; + // The result type of the GEP might not be the type of the memory + // access. + if (!TLI->isLegalAddressingMode(*DL, AddrMode, + GEP->getResultElementType(), + GEP->getAddressSpace())) { + // We need to create a new base if the offset to the current base is + // too large to fit into the addressing mode. So, a very large struct + // may be splitted into several parts. + BaseGEP = GEP; + BaseOffset = Offset; + NewBaseGEP = nullptr; + } + } + + // Generate a new GEP to replace the current one. + IRBuilder<> Builder(GEP); + Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); + Type *I8PtrTy = + Builder.getInt8PtrTy(GEP->getType()->getPointerAddressSpace()); + Type *I8Ty = Builder.getInt8Ty(); + + if (!NewBaseGEP) { + // Create a new base if we don't have one yet. Find the insertion + // pointer for the new base first. + BasicBlock::iterator NewBaseInsertPt; + BasicBlock *NewBaseInsertBB; + if (auto *BaseI = dyn_cast<Instruction>(OldBase)) { + // If the base of the struct is an instruction, the new base will be + // inserted close to it. + NewBaseInsertBB = BaseI->getParent(); + if (isa<PHINode>(BaseI)) + NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt(); + else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) { + NewBaseInsertBB = + SplitEdge(NewBaseInsertBB, Invoke->getNormalDest()); + NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt(); + } else + NewBaseInsertPt = std::next(BaseI->getIterator()); + } else { + // If the current base is an argument or global value, the new base + // will be inserted to the entry block. + NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock(); + NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt(); + } + IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt); + // Create a new base. + Value *BaseIndex = ConstantInt::get(IntPtrTy, BaseOffset); + NewBaseGEP = OldBase; + if (NewBaseGEP->getType() != I8PtrTy) + NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy); + NewBaseGEP = + NewBaseBuilder.CreateGEP(I8Ty, NewBaseGEP, BaseIndex, "splitgep"); + NewGEPBases.insert(NewBaseGEP); + } + + Value *NewGEP = NewBaseGEP; + if (Offset == BaseOffset) { + if (GEP->getType() != I8PtrTy) + NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType()); + } else { + // Calculate the new offset for the new GEP. + Value *Index = ConstantInt::get(IntPtrTy, Offset - BaseOffset); + NewGEP = Builder.CreateGEP(I8Ty, NewBaseGEP, Index); + + if (GEP->getType() != I8PtrTy) + NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType()); + } + GEP->replaceAllUsesWith(NewGEP); + LargeOffsetGEPID.erase(GEP); + LargeOffsetGEP = LargeOffsetGEPs.erase(LargeOffsetGEP); + GEP->eraseFromParent(); + Changed = true; + } + } + return Changed; +} + /// Return true, if an ext(load) can be formed from an extension in /// \p MovedExts. bool CodeGenPrepare::canFormExtLd( @@ -5053,8 +5348,7 @@ bool CodeGenPrepare::optimizeExtUses(Instruction *I) { // x = phi x1', x2' // y = and x, 0xff bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) { - if (!Load->isSimple() || - !(Load->getType()->isIntegerTy() || Load->getType()->isPointerTy())) + if (!Load->isSimple() || !Load->getType()->isIntOrPtrTy()) return false; // Skip loads we've already transformed. @@ -5519,7 +5813,7 @@ bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) { namespace { -/// \brief Helper class to promote a scalar operation to a vector one. +/// Helper class to promote a scalar operation to a vector one. /// This class is used to move downward extractelement transition. /// E.g., /// a = vector_op <2 x i32> @@ -5556,7 +5850,7 @@ class VectorPromoteHelper { /// Instruction that will be combined with the transition. Instruction *CombineInst = nullptr; - /// \brief The instruction that represents the current end of the transition. + /// The instruction that represents the current end of the transition. /// Since we are faking the promotion until we reach the end of the chain /// of computation, we need a way to get the current end of the transition. Instruction *getEndOfTransition() const { @@ -5565,7 +5859,7 @@ class VectorPromoteHelper { return InstsToBePromoted.back(); } - /// \brief Return the index of the original value in the transition. + /// Return the index of the original value in the transition. /// E.g., for "extractelement <2 x i32> c, i32 1" the original value, /// c, is at index 0. unsigned getTransitionOriginalValueIdx() const { @@ -5574,7 +5868,7 @@ class VectorPromoteHelper { return 0; } - /// \brief Return the index of the index in the transition. + /// Return the index of the index in the transition. /// E.g., for "extractelement <2 x i32> c, i32 0" the index /// is at index 1. unsigned getTransitionIdx() const { @@ -5583,7 +5877,7 @@ class VectorPromoteHelper { return 1; } - /// \brief Get the type of the transition. + /// Get the type of the transition. /// This is the type of the original value. /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the /// transition is <2 x i32>. @@ -5591,7 +5885,7 @@ class VectorPromoteHelper { return Transition->getOperand(getTransitionOriginalValueIdx())->getType(); } - /// \brief Promote \p ToBePromoted by moving \p Def downward through. + /// Promote \p ToBePromoted by moving \p Def downward through. /// I.e., we have the following sequence: /// Def = Transition <ty1> a to <ty2> /// b = ToBePromoted <ty2> Def, ... @@ -5600,7 +5894,7 @@ class VectorPromoteHelper { /// Def = Transition <ty1> ToBePromoted to <ty2> void promoteImpl(Instruction *ToBePromoted); - /// \brief Check whether or not it is profitable to promote all the + /// Check whether or not it is profitable to promote all the /// instructions enqueued to be promoted. bool isProfitableToPromote() { Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx()); @@ -5646,12 +5940,13 @@ class VectorPromoteHelper { VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType, Arg0OVK, Arg1OVK); } - DEBUG(dbgs() << "Estimated cost of computation to be promoted:\nScalar: " - << ScalarCost << "\nVector: " << VectorCost << '\n'); + LLVM_DEBUG( + dbgs() << "Estimated cost of computation to be promoted:\nScalar: " + << ScalarCost << "\nVector: " << VectorCost << '\n'); return ScalarCost > VectorCost; } - /// \brief Generate a constant vector with \p Val with the same + /// Generate a constant vector with \p Val with the same /// number of elements as the transition. /// \p UseSplat defines whether or not \p Val should be replicated /// across the whole vector. @@ -5686,7 +5981,7 @@ class VectorPromoteHelper { return ConstantVector::get(ConstVec); } - /// \brief Check if promoting to a vector type an operand at \p OperandIdx + /// Check if promoting to a vector type an operand at \p OperandIdx /// in \p Use can trigger undefined behavior. static bool canCauseUndefinedBehavior(const Instruction *Use, unsigned OperandIdx) { @@ -5718,13 +6013,13 @@ public: assert(Transition && "Do not know how to promote null"); } - /// \brief Check if we can promote \p ToBePromoted to \p Type. + /// Check if we can promote \p ToBePromoted to \p Type. bool canPromote(const Instruction *ToBePromoted) const { // We could support CastInst too. return isa<BinaryOperator>(ToBePromoted); } - /// \brief Check if it is profitable to promote \p ToBePromoted + /// Check if it is profitable to promote \p ToBePromoted /// by moving downward the transition through. bool shouldPromote(const Instruction *ToBePromoted) const { // Promote only if all the operands can be statically expanded. @@ -5752,23 +6047,23 @@ public: ISDOpcode, TLI.getValueType(DL, getTransitionType(), true)); } - /// \brief Check whether or not \p Use can be combined + /// Check whether or not \p Use can be combined /// with the transition. /// I.e., is it possible to do Use(Transition) => AnotherUse? bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); } - /// \brief Record \p ToBePromoted as part of the chain to be promoted. + /// Record \p ToBePromoted as part of the chain to be promoted. void enqueueForPromotion(Instruction *ToBePromoted) { InstsToBePromoted.push_back(ToBePromoted); } - /// \brief Set the instruction that will be combined with the transition. + /// Set the instruction that will be combined with the transition. void recordCombineInstruction(Instruction *ToBeCombined) { assert(canCombine(ToBeCombined) && "Unsupported instruction to combine"); CombineInst = ToBeCombined; } - /// \brief Promote all the instructions enqueued for promotion if it is + /// Promote all the instructions enqueued for promotion if it is /// is profitable. /// \return True if the promotion happened, false otherwise. bool promote() { @@ -5852,35 +6147,36 @@ bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) { // => we would need to check that we are moving it at a cheaper place and // we do not do that for now. BasicBlock *Parent = Inst->getParent(); - DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n'); + LLVM_DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n'); VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost); // If the transition has more than one use, assume this is not going to be // beneficial. while (Inst->hasOneUse()) { Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin()); - DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n'); + LLVM_DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n'); if (ToBePromoted->getParent() != Parent) { - DEBUG(dbgs() << "Instruction to promote is in a different block (" - << ToBePromoted->getParent()->getName() - << ") than the transition (" << Parent->getName() << ").\n"); + LLVM_DEBUG(dbgs() << "Instruction to promote is in a different block (" + << ToBePromoted->getParent()->getName() + << ") than the transition (" << Parent->getName() + << ").\n"); return false; } if (VPH.canCombine(ToBePromoted)) { - DEBUG(dbgs() << "Assume " << *Inst << '\n' - << "will be combined with: " << *ToBePromoted << '\n'); + LLVM_DEBUG(dbgs() << "Assume " << *Inst << '\n' + << "will be combined with: " << *ToBePromoted << '\n'); VPH.recordCombineInstruction(ToBePromoted); bool Changed = VPH.promote(); NumStoreExtractExposed += Changed; return Changed; } - DEBUG(dbgs() << "Try promoting.\n"); + LLVM_DEBUG(dbgs() << "Try promoting.\n"); if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted)) return false; - DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n"); + LLVM_DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n"); VPH.enqueueForPromotion(ToBePromoted); Inst = ToBePromoted; @@ -5890,7 +6186,7 @@ bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) { /// For the instruction sequence of store below, F and I values /// are bundled together as an i64 value before being stored into memory. -/// Sometimes it is more efficent to generate separate stores for F and I, +/// Sometimes it is more efficient to generate separate stores for F and I, /// which can remove the bitwise instructions or sink them to colder places. /// /// (store (or (zext (bitcast F to i32) to i64), @@ -5978,12 +6274,13 @@ static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, if (HBC && HBC->getParent() != SI.getParent()) HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType()); + bool IsLE = SI.getModule()->getDataLayout().isLittleEndian(); auto CreateSplitStore = [&](Value *V, bool Upper) { V = Builder.CreateZExtOrBitCast(V, SplitStoreType); Value *Addr = Builder.CreateBitCast( SI.getOperand(1), SplitStoreType->getPointerTo(SI.getPointerAddressSpace())); - if (Upper) + if ((IsLE && Upper) || (!IsLE && !Upper)) Addr = Builder.CreateGEP( SplitStoreType, Addr, ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1)); @@ -6270,6 +6567,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) { /// The GEP operand must be a pointer, so must its result -> BitCast Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(), GEPI->getName(), GEPI); + NC->setDebugLoc(GEPI->getDebugLoc()); GEPI->replaceAllUsesWith(NC); GEPI->eraseFromParent(); ++NumGEPsElim; @@ -6374,7 +6672,8 @@ bool CodeGenPrepare::placeDbgValues(Function &F) { // after it. if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad()) continue; - DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI); + LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n" + << *DVI << ' ' << *VI); DVI->removeFromParent(); if (isa<PHINode>(VI)) DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt()); @@ -6388,7 +6687,7 @@ bool CodeGenPrepare::placeDbgValues(Function &F) { return MadeChange; } -/// \brief Scale down both weights to fit into uint32_t. +/// Scale down both weights to fit into uint32_t. static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) { uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse; uint32_t Scale = (NewMax / std::numeric_limits<uint32_t>::max()) + 1; @@ -6396,7 +6695,7 @@ static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) { NewFalse = NewFalse / Scale; } -/// \brief Some targets prefer to split a conditional branch like: +/// Some targets prefer to split a conditional branch like: /// \code /// %0 = icmp ne i32 %a, 0 /// %1 = icmp ne i32 %b, 0 @@ -6453,7 +6752,7 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) { !match(Cond2, m_CombineOr(m_Cmp(), m_BinOp())) ) continue; - DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump()); + LLVM_DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump()); // Create a new BB. auto TmpBB = @@ -6465,8 +6764,8 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) { Br1->setCondition(Cond1); LogicOp->eraseFromParent(); - // Depending on the conditon we have to either replace the true or the false - // successor of the original branch instruction. + // Depending on the condition we have to either replace the true or the + // false successor of the original branch instruction. if (Opc == Instruction::And) Br1->setSuccessor(0, TmpBB); else @@ -6519,8 +6818,8 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) { // We have flexibility in setting Prob for BB1 and Prob for NewBB. // The requirement is that // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) - // = TrueProb for orignal BB. - // Assuming the orignal weights are A and B, one choice is to set BB1's + // = TrueProb for original BB. + // Assuming the original weights are A and B, one choice is to set BB1's // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice // assumes that // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. @@ -6554,8 +6853,8 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) { // We have flexibility in setting Prob for BB1 and Prob for TmpBB. // The requirement is that // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) - // = FalseProb for orignal BB. - // Assuming the orignal weights are A and B, one choice is to set BB1's + // = FalseProb for original BB. + // Assuming the original weights are A and B, one choice is to set BB1's // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice // assumes that // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB. @@ -6581,8 +6880,8 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) { MadeChange = true; - DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump(); - TmpBB->dump()); + LLVM_DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump(); + TmpBB->dump()); } return MadeChange; } diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp index 98e22b24d37a..840e5ede6444 100644 --- a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -113,7 +113,7 @@ void CriticalAntiDepBreaker::Observe(MachineInstr &MI, unsigned Count, // FIXME: It may be possible to remove the isKill() restriction once PR18663 // has been properly fixed. There can be value in processing kills as seen in // the AggressiveAntiDepBreaker class. - if (MI.isDebugValue() || MI.isKill()) + if (MI.isDebugInstr() || MI.isKill()) return; assert(Count < InsertPosIndex && "Instruction index out of expected range!"); @@ -170,11 +170,11 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) { // FIXME: The issue with predicated instruction is more complex. We are being // conservative here because the kill markers cannot be trusted after // if-conversion: - // %r6 = LDR %sp, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14] + // %r6 = LDR %sp, %reg0, 92, 14, %reg0; mem:LD4[FixedStack14] // ... - // STR %r0, killed %r6, %reg0, 0, pred:0, pred:%cpsr; mem:ST4[%395] - // %r6 = LDR %sp, %reg0, 100, pred:0, pred:%cpsr; mem:LD4[FixedStack12] - // STR %r0, killed %r6, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8) + // STR %r0, killed %r6, %reg0, 0, 0, %cpsr; mem:ST4[%395] + // %r6 = LDR %sp, %reg0, 100, 0, %cpsr; mem:LD4[FixedStack12] + // STR %r0, killed %r6, %reg0, 0, 14, %reg0; mem:ST4[%396](align=8) // // The first R6 kill is not really a kill since it's killed by a predicated // instruction which may not be executed. The second R6 def may or may not @@ -461,14 +461,14 @@ BreakAntiDependencies(const std::vector<SUnit> &SUnits, #ifndef NDEBUG { - DEBUG(dbgs() << "Critical path has total latency " - << (Max->getDepth() + Max->Latency) << "\n"); - DEBUG(dbgs() << "Available regs:"); + LLVM_DEBUG(dbgs() << "Critical path has total latency " + << (Max->getDepth() + Max->Latency) << "\n"); + LLVM_DEBUG(dbgs() << "Available regs:"); for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) { if (KillIndices[Reg] == ~0u) - DEBUG(dbgs() << " " << printReg(Reg, TRI)); + LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI)); } - DEBUG(dbgs() << '\n'); + LLVM_DEBUG(dbgs() << '\n'); } #endif @@ -534,7 +534,7 @@ BreakAntiDependencies(const std::vector<SUnit> &SUnits, // FIXME: It may be possible to remove the isKill() restriction once PR18663 // has been properly fixed. There can be value in processing kills as seen // in the AggressiveAntiDepBreaker class. - if (MI.isDebugValue() || MI.isKill()) + if (MI.isDebugInstr() || MI.isKill()) continue; // Check if this instruction has a dependence on the critical path that @@ -645,10 +645,10 @@ BreakAntiDependencies(const std::vector<SUnit> &SUnits, AntiDepReg, LastNewReg[AntiDepReg], RC, ForbidRegs)) { - DEBUG(dbgs() << "Breaking anti-dependence edge on " - << printReg(AntiDepReg, TRI) << " with " - << RegRefs.count(AntiDepReg) << " references" - << " using " << printReg(NewReg, TRI) << "!\n"); + LLVM_DEBUG(dbgs() << "Breaking anti-dependence edge on " + << printReg(AntiDepReg, TRI) << " with " + << RegRefs.count(AntiDepReg) << " references" + << " using " << printReg(NewReg, TRI) << "!\n"); // Update the references to the old register to refer to the new // register. diff --git a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp index 848db444270d..cd302e78cc3e 100644 --- a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp +++ b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp @@ -222,7 +222,7 @@ VLIWPacketizerList::~VLIWPacketizerList() { // End the current packet, bundle packet instructions and reset DFA state. void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB, MachineBasicBlock::iterator MI) { - DEBUG({ + LLVM_DEBUG({ if (!CurrentPacketMIs.empty()) { dbgs() << "Finalizing packet:\n"; for (MachineInstr *MI : CurrentPacketMIs) @@ -235,7 +235,7 @@ void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB, } CurrentPacketMIs.clear(); ResourceTracker->clearResources(); - DEBUG(dbgs() << "End packet\n"); + LLVM_DEBUG(dbgs() << "End packet\n"); } // Bundle machine instructions into packets. @@ -248,7 +248,7 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, std::distance(BeginItr, EndItr)); VLIWScheduler->schedule(); - DEBUG({ + LLVM_DEBUG({ dbgs() << "Scheduling DAG of the packetize region\n"; for (SUnit &SU : VLIWScheduler->SUnits) SU.dumpAll(VLIWScheduler); @@ -287,10 +287,10 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, assert(SUI && "Missing SUnit Info!"); // Ask DFA if machine resource is available for MI. - DEBUG(dbgs() << "Checking resources for adding MI to packet " << MI); + LLVM_DEBUG(dbgs() << "Checking resources for adding MI to packet " << MI); bool ResourceAvail = ResourceTracker->canReserveResources(MI); - DEBUG({ + LLVM_DEBUG({ if (ResourceAvail) dbgs() << " Resources are available for adding MI to packet\n"; else @@ -302,31 +302,33 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, SUnit *SUJ = MIToSUnit[MJ]; assert(SUJ && "Missing SUnit Info!"); - DEBUG(dbgs() << " Checking against MJ " << *MJ); + LLVM_DEBUG(dbgs() << " Checking against MJ " << *MJ); // Is it legal to packetize SUI and SUJ together. if (!isLegalToPacketizeTogether(SUI, SUJ)) { - DEBUG(dbgs() << " Not legal to add MI, try to prune\n"); + LLVM_DEBUG(dbgs() << " Not legal to add MI, try to prune\n"); // Allow packetization if dependency can be pruned. if (!isLegalToPruneDependencies(SUI, SUJ)) { // End the packet if dependency cannot be pruned. - DEBUG(dbgs() << " Could not prune dependencies for adding MI\n"); + LLVM_DEBUG(dbgs() + << " Could not prune dependencies for adding MI\n"); endPacket(MBB, MI); break; } - DEBUG(dbgs() << " Pruned dependence for adding MI\n"); + LLVM_DEBUG(dbgs() << " Pruned dependence for adding MI\n"); } } } else { - DEBUG(if (ResourceAvail) - dbgs() << "Resources are available, but instruction should not be " - "added to packet\n " << MI); + LLVM_DEBUG(if (ResourceAvail) dbgs() + << "Resources are available, but instruction should not be " + "added to packet\n " + << MI); // End the packet if resource is not available, or if the instruction // shoud not be added to the current packet. endPacket(MBB, MI); } // Add MI to the current packet. - DEBUG(dbgs() << "* Adding MI to packet " << MI << '\n'); + LLVM_DEBUG(dbgs() << "* Adding MI to packet " << MI << '\n'); BeginItr = addToPacket(MI); } // For all instructions in the packetization range. diff --git a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp index e6a54bb300f2..ff44c5660bad 100644 --- a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -125,7 +125,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { // If the instruction is dead, delete it! if (isDead(MI)) { - DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI); + LLVM_DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI); // It is possible that some DBG_VALUE instructions refer to this // instruction. They get marked as undef and will be deleted // in the live debug variable analysis. diff --git a/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp b/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp index 7d7eb57352a2..c83db476a4de 100644 --- a/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp +++ b/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp @@ -439,7 +439,7 @@ LaneBitmask DetectDeadLanes::determineInitialUsedLanes(unsigned Reg) { const TargetRegisterClass *DstRC = MRI->getRegClass(DefReg); CrossCopy = isCrossCopy(*MRI, UseMI, DstRC, MO); if (CrossCopy) - DEBUG(dbgs() << "Copy across incompatible classes: " << UseMI); + LLVM_DEBUG(dbgs() << "Copy across incompatible classes: " << UseMI); } if (!CrossCopy) @@ -520,17 +520,15 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) { transferDefinedLanesStep(MO, Info.DefinedLanes); } - DEBUG( - dbgs() << "Defined/Used lanes:\n"; - for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx); - const VRegInfo &Info = VRegInfos[RegIdx]; - dbgs() << printReg(Reg, nullptr) - << " Used: " << PrintLaneMask(Info.UsedLanes) - << " Def: " << PrintLaneMask(Info.DefinedLanes) << '\n'; - } - dbgs() << "\n"; - ); + LLVM_DEBUG(dbgs() << "Defined/Used lanes:\n"; for (unsigned RegIdx = 0; + RegIdx < NumVirtRegs; + ++RegIdx) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx); + const VRegInfo &Info = VRegInfos[RegIdx]; + dbgs() << printReg(Reg, nullptr) + << " Used: " << PrintLaneMask(Info.UsedLanes) + << " Def: " << PrintLaneMask(Info.DefinedLanes) << '\n'; + } dbgs() << "\n";); bool Again = false; // Mark operands as dead/unused. @@ -545,18 +543,19 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) { unsigned RegIdx = TargetRegisterInfo::virtReg2Index(Reg); const VRegInfo &RegInfo = VRegInfos[RegIdx]; if (MO.isDef() && !MO.isDead() && RegInfo.UsedLanes.none()) { - DEBUG(dbgs() << "Marking operand '" << MO << "' as dead in " << MI); + LLVM_DEBUG(dbgs() + << "Marking operand '" << MO << "' as dead in " << MI); MO.setIsDead(); } if (MO.readsReg()) { bool CrossCopy = false; if (isUndefRegAtInput(MO, RegInfo)) { - DEBUG(dbgs() << "Marking operand '" << MO << "' as undef in " - << MI); + LLVM_DEBUG(dbgs() + << "Marking operand '" << MO << "' as undef in " << MI); MO.setIsUndef(); } else if (isUndefInput(MO, &CrossCopy)) { - DEBUG(dbgs() << "Marking operand '" << MO << "' as undef in " - << MI); + LLVM_DEBUG(dbgs() + << "Marking operand '" << MO << "' as undef in " << MI); MO.setIsUndef(); if (CrossCopy) Again = true; @@ -577,7 +576,7 @@ bool DetectDeadLanes::runOnMachineFunction(MachineFunction &MF) { // so we safe the compile time. MRI = &MF.getRegInfo(); if (!MRI->subRegLivenessEnabled()) { - DEBUG(dbgs() << "Skipping Detect dead lanes pass\n"); + LLVM_DEBUG(dbgs() << "Skipping Detect dead lanes pass\n"); return false; } diff --git a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp index 39d80c0bf9bd..4586649d17f0 100644 --- a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp @@ -18,6 +18,7 @@ #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -33,7 +34,6 @@ #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Transforms/Utils/Local.h" #include <cstddef> using namespace llvm; @@ -195,9 +195,9 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { if (Resumes.empty()) return false; - // Check the personality, don't do anything if it's funclet-based. + // Check the personality, don't do anything if it's scope-based. EHPersonality Pers = classifyEHPersonality(Fn.getPersonalityFn()); - if (isFuncletEHPersonality(Pers)) + if (isScopedEHPersonality(Pers)) return false; LLVMContext &Ctx = Fn.getContext(); diff --git a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp index 6294ff450113..098afd885f2f 100644 --- a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp +++ b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp @@ -185,7 +185,7 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) { // Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to // get right. if (!MBB->livein_empty()) { - DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n"); + LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n"); return false; } @@ -195,18 +195,18 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) { // terminators never have side effects or define any used register values. for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->getFirstTerminator(); I != E; ++I) { - if (I->isDebugValue()) + if (I->isDebugInstr()) continue; if (++InstrCount > BlockInstrLimit && !Stress) { - DEBUG(dbgs() << printMBBReference(*MBB) << " has more than " - << BlockInstrLimit << " instructions.\n"); + LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has more than " + << BlockInstrLimit << " instructions.\n"); return false; } // There shouldn't normally be any phis in a single-predecessor block. if (I->isPHI()) { - DEBUG(dbgs() << "Can't hoist: " << *I); + LLVM_DEBUG(dbgs() << "Can't hoist: " << *I); return false; } @@ -214,21 +214,21 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) { // speculate GOT or constant pool loads that are guaranteed not to trap, // but we don't support that for now. if (I->mayLoad()) { - DEBUG(dbgs() << "Won't speculate load: " << *I); + LLVM_DEBUG(dbgs() << "Won't speculate load: " << *I); return false; } // We never speculate stores, so an AA pointer isn't necessary. bool DontMoveAcrossStore = true; if (!I->isSafeToMove(nullptr, DontMoveAcrossStore)) { - DEBUG(dbgs() << "Can't speculate: " << *I); + LLVM_DEBUG(dbgs() << "Can't speculate: " << *I); return false; } // Check for any dependencies on Head instructions. for (const MachineOperand &MO : I->operands()) { if (MO.isRegMask()) { - DEBUG(dbgs() << "Won't speculate regmask: " << *I); + LLVM_DEBUG(dbgs() << "Won't speculate regmask: " << *I); return false; } if (!MO.isReg()) @@ -246,9 +246,10 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) { if (!DefMI || DefMI->getParent() != Head) continue; if (InsertAfter.insert(DefMI).second) - DEBUG(dbgs() << printMBBReference(*MBB) << " depends on " << *DefMI); + LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " depends on " + << *DefMI); if (DefMI->isTerminator()) { - DEBUG(dbgs() << "Can't insert instructions below terminator.\n"); + LLVM_DEBUG(dbgs() << "Can't insert instructions below terminator.\n"); return false; } } @@ -279,7 +280,7 @@ bool SSAIfConv::findInsertionPoint() { --I; // Some of the conditional code depends in I. if (InsertAfter.count(&*I)) { - DEBUG(dbgs() << "Can't insert code after " << *I); + LLVM_DEBUG(dbgs() << "Can't insert code after " << *I); return false; } @@ -313,7 +314,7 @@ bool SSAIfConv::findInsertionPoint() { // Some of the clobbered registers are live before I, not a valid insertion // point. if (!LiveRegUnits.empty()) { - DEBUG({ + LLVM_DEBUG({ dbgs() << "Would clobber"; for (SparseSet<unsigned>::const_iterator i = LiveRegUnits.begin(), e = LiveRegUnits.end(); i != e; ++i) @@ -325,10 +326,10 @@ bool SSAIfConv::findInsertionPoint() { // This is a valid insertion point. InsertionPoint = I; - DEBUG(dbgs() << "Can insert before " << *I); + LLVM_DEBUG(dbgs() << "Can insert before " << *I); return true; } - DEBUG(dbgs() << "No legal insertion point found.\n"); + LLVM_DEBUG(dbgs() << "No legal insertion point found.\n"); return false; } @@ -361,39 +362,39 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) { if (Succ1->pred_size() != 1 || Succ1->succ_size() != 1 || Succ1->succ_begin()[0] != Tail) return false; - DEBUG(dbgs() << "\nDiamond: " << printMBBReference(*Head) << " -> " - << printMBBReference(*Succ0) << "/" - << printMBBReference(*Succ1) << " -> " - << printMBBReference(*Tail) << '\n'); + LLVM_DEBUG(dbgs() << "\nDiamond: " << printMBBReference(*Head) << " -> " + << printMBBReference(*Succ0) << "/" + << printMBBReference(*Succ1) << " -> " + << printMBBReference(*Tail) << '\n'); // Live-in physregs are tricky to get right when speculating code. if (!Tail->livein_empty()) { - DEBUG(dbgs() << "Tail has live-ins.\n"); + LLVM_DEBUG(dbgs() << "Tail has live-ins.\n"); return false; } } else { - DEBUG(dbgs() << "\nTriangle: " << printMBBReference(*Head) << " -> " - << printMBBReference(*Succ0) << " -> " - << printMBBReference(*Tail) << '\n'); + LLVM_DEBUG(dbgs() << "\nTriangle: " << printMBBReference(*Head) << " -> " + << printMBBReference(*Succ0) << " -> " + << printMBBReference(*Tail) << '\n'); } // This is a triangle or a diamond. // If Tail doesn't have any phis, there must be side effects. if (Tail->empty() || !Tail->front().isPHI()) { - DEBUG(dbgs() << "No phis in tail.\n"); + LLVM_DEBUG(dbgs() << "No phis in tail.\n"); return false; } // The branch we're looking to eliminate must be analyzable. Cond.clear(); if (TII->analyzeBranch(*Head, TBB, FBB, Cond)) { - DEBUG(dbgs() << "Branch not analyzable.\n"); + LLVM_DEBUG(dbgs() << "Branch not analyzable.\n"); return false; } // This is weird, probably some sort of degenerate CFG. if (!TBB) { - DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch.\n"); + LLVM_DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch.\n"); return false; } @@ -422,7 +423,7 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) { // Get target information. if (!TII->canInsertSelect(*Head, Cond, PI.TReg, PI.FReg, PI.CondCycles, PI.TCycles, PI.FCycles)) { - DEBUG(dbgs() << "Can't convert: " << *PI.PHI); + LLVM_DEBUG(dbgs() << "Can't convert: " << *PI.PHI); return false; } } @@ -459,10 +460,10 @@ void SSAIfConv::replacePHIInstrs() { // Convert all PHIs to select instructions inserted before FirstTerm. for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { PHIInfo &PI = PHIs[i]; - DEBUG(dbgs() << "If-converting " << *PI.PHI); + LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI); unsigned DstReg = PI.PHI->getOperand(0).getReg(); TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg); - DEBUG(dbgs() << " --> " << *std::prev(FirstTerm)); + LLVM_DEBUG(dbgs() << " --> " << *std::prev(FirstTerm)); PI.PHI->eraseFromParent(); PI.PHI = nullptr; } @@ -481,7 +482,7 @@ void SSAIfConv::rewritePHIOperands() { PHIInfo &PI = PHIs[i]; unsigned DstReg = 0; - DEBUG(dbgs() << "If-converting " << *PI.PHI); + LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI); if (PI.TReg == PI.FReg) { // We do not need the select instruction if both incoming values are // equal. @@ -491,7 +492,7 @@ void SSAIfConv::rewritePHIOperands() { DstReg = MRI->createVirtualRegister(MRI->getRegClass(PHIDst)); TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg); - DEBUG(dbgs() << " --> " << *std::prev(FirstTerm)); + LLVM_DEBUG(dbgs() << " --> " << *std::prev(FirstTerm)); } // Rewrite PHI operands TPred -> (DstReg, Head), remove FPred. @@ -505,7 +506,7 @@ void SSAIfConv::rewritePHIOperands() { PI.PHI->RemoveOperand(i-2); } } - DEBUG(dbgs() << " --> " << *PI.PHI); + LLVM_DEBUG(dbgs() << " --> " << *PI.PHI); } } @@ -563,8 +564,8 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) { assert(Head->succ_empty() && "Additional head successors?"); if (!ExtraPreds && Head->isLayoutSuccessor(Tail)) { // Splice Tail onto the end of Head. - DEBUG(dbgs() << "Joining tail " << printMBBReference(*Tail) << " into head " - << printMBBReference(*Head) << '\n'); + LLVM_DEBUG(dbgs() << "Joining tail " << printMBBReference(*Tail) + << " into head " << printMBBReference(*Head) << '\n'); Head->splice(Head->end(), Tail, Tail->begin(), Tail->end()); Head->transferSuccessorsAndUpdatePHIs(Tail); @@ -572,12 +573,12 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) { Tail->eraseFromParent(); } else { // We need a branch to Tail, let code placement work it out later. - DEBUG(dbgs() << "Converting to unconditional branch.\n"); + LLVM_DEBUG(dbgs() << "Converting to unconditional branch.\n"); SmallVector<MachineOperand, 0> EmptyCond; TII->insertBranch(*Head, Tail, nullptr, EmptyCond, HeadDL); Head->addSuccessor(Tail); } - DEBUG(dbgs() << *Head); + LLVM_DEBUG(dbgs() << *Head); } @@ -692,7 +693,7 @@ bool EarlyIfConverter::shouldConvertIf() { MachineTraceMetrics::Trace TBBTrace = MinInstr->getTrace(IfConv.getTPred()); MachineTraceMetrics::Trace FBBTrace = MinInstr->getTrace(IfConv.getFPred()); - DEBUG(dbgs() << "TBB: " << TBBTrace << "FBB: " << FBBTrace); + LLVM_DEBUG(dbgs() << "TBB: " << TBBTrace << "FBB: " << FBBTrace); unsigned MinCrit = std::min(TBBTrace.getCriticalPath(), FBBTrace.getCriticalPath()); @@ -706,10 +707,10 @@ bool EarlyIfConverter::shouldConvertIf() { if (IfConv.TBB != IfConv.Tail) ExtraBlocks.push_back(IfConv.TBB); unsigned ResLength = FBBTrace.getResourceLength(ExtraBlocks); - DEBUG(dbgs() << "Resource length " << ResLength - << ", minimal critical path " << MinCrit << '\n'); + LLVM_DEBUG(dbgs() << "Resource length " << ResLength + << ", minimal critical path " << MinCrit << '\n'); if (ResLength > MinCrit + CritLimit) { - DEBUG(dbgs() << "Not enough available ILP.\n"); + LLVM_DEBUG(dbgs() << "Not enough available ILP.\n"); return false; } @@ -719,7 +720,7 @@ bool EarlyIfConverter::shouldConvertIf() { MachineTraceMetrics::Trace HeadTrace = MinInstr->getTrace(IfConv.Head); unsigned BranchDepth = HeadTrace.getInstrCycles(*IfConv.Head->getFirstTerminator()).Depth; - DEBUG(dbgs() << "Branch depth: " << BranchDepth << '\n'); + LLVM_DEBUG(dbgs() << "Branch depth: " << BranchDepth << '\n'); // Look at all the tail phis, and compute the critical path extension caused // by inserting select instructions. @@ -728,15 +729,15 @@ bool EarlyIfConverter::shouldConvertIf() { SSAIfConv::PHIInfo &PI = IfConv.PHIs[i]; unsigned Slack = TailTrace.getInstrSlack(*PI.PHI); unsigned MaxDepth = Slack + TailTrace.getInstrCycles(*PI.PHI).Depth; - DEBUG(dbgs() << "Slack " << Slack << ":\t" << *PI.PHI); + LLVM_DEBUG(dbgs() << "Slack " << Slack << ":\t" << *PI.PHI); // The condition is pulled into the critical path. unsigned CondDepth = adjCycles(BranchDepth, PI.CondCycles); if (CondDepth > MaxDepth) { unsigned Extra = CondDepth - MaxDepth; - DEBUG(dbgs() << "Condition adds " << Extra << " cycles.\n"); + LLVM_DEBUG(dbgs() << "Condition adds " << Extra << " cycles.\n"); if (Extra > CritLimit) { - DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); + LLVM_DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); return false; } } @@ -745,9 +746,9 @@ bool EarlyIfConverter::shouldConvertIf() { unsigned TDepth = adjCycles(TBBTrace.getPHIDepth(*PI.PHI), PI.TCycles); if (TDepth > MaxDepth) { unsigned Extra = TDepth - MaxDepth; - DEBUG(dbgs() << "TBB data adds " << Extra << " cycles.\n"); + LLVM_DEBUG(dbgs() << "TBB data adds " << Extra << " cycles.\n"); if (Extra > CritLimit) { - DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); + LLVM_DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); return false; } } @@ -756,9 +757,9 @@ bool EarlyIfConverter::shouldConvertIf() { unsigned FDepth = adjCycles(FBBTrace.getPHIDepth(*PI.PHI), PI.FCycles); if (FDepth > MaxDepth) { unsigned Extra = FDepth - MaxDepth; - DEBUG(dbgs() << "FBB data adds " << Extra << " cycles.\n"); + LLVM_DEBUG(dbgs() << "FBB data adds " << Extra << " cycles.\n"); if (Extra > CritLimit) { - DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); + LLVM_DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); return false; } } @@ -783,8 +784,8 @@ bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) { } bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { - DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n" - << "********** Function: " << MF.getName() << '\n'); + LLVM_DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n" + << "********** Function: " << MF.getName() << '\n'); if (skipFunction(MF.getFunction())) return false; diff --git a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp deleted file mode 100644 index 61ec3f4be1dc..000000000000 --- a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp +++ /dev/null @@ -1,755 +0,0 @@ -//===- ExecutionDepsFix.cpp - Fix execution dependecy issues ----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGen/ExecutionDepsFix.h" - -#include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/iterator_range.h" -#include "llvm/CodeGen/LivePhysRegs.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/RegisterClassInfo.h" -#include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/Support/Allocator.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -#define DEBUG_TYPE "execution-deps-fix" - -/// Translate TRI register number to a list of indices into our smaller tables -/// of interesting registers. -iterator_range<SmallVectorImpl<int>::const_iterator> -ExecutionDepsFix::regIndices(unsigned Reg) const { - assert(Reg < AliasMap.size() && "Invalid register"); - const auto &Entry = AliasMap[Reg]; - return make_range(Entry.begin(), Entry.end()); -} - -DomainValue *ExecutionDepsFix::alloc(int domain) { - DomainValue *dv = Avail.empty() ? - new(Allocator.Allocate()) DomainValue : - Avail.pop_back_val(); - if (domain >= 0) - dv->addDomain(domain); - assert(dv->Refs == 0 && "Reference count wasn't cleared"); - assert(!dv->Next && "Chained DomainValue shouldn't have been recycled"); - return dv; -} - -/// Release a reference to DV. When the last reference is released, -/// collapse if needed. -void ExecutionDepsFix::release(DomainValue *DV) { - while (DV) { - assert(DV->Refs && "Bad DomainValue"); - if (--DV->Refs) - return; - - // There are no more DV references. Collapse any contained instructions. - if (DV->AvailableDomains && !DV->isCollapsed()) - collapse(DV, DV->getFirstDomain()); - - DomainValue *Next = DV->Next; - DV->clear(); - Avail.push_back(DV); - // Also release the next DomainValue in the chain. - DV = Next; - } -} - -/// Follow the chain of dead DomainValues until a live DomainValue is reached. -/// Update the referenced pointer when necessary. -DomainValue *ExecutionDepsFix::resolve(DomainValue *&DVRef) { - DomainValue *DV = DVRef; - if (!DV || !DV->Next) - return DV; - - // DV has a chain. Find the end. - do DV = DV->Next; - while (DV->Next); - - // Update DVRef to point to DV. - retain(DV); - release(DVRef); - DVRef = DV; - return DV; -} - -/// Set LiveRegs[rx] = dv, updating reference counts. -void ExecutionDepsFix::setLiveReg(int rx, DomainValue *dv) { - assert(unsigned(rx) < NumRegs && "Invalid index"); - assert(LiveRegs && "Must enter basic block first."); - - if (LiveRegs[rx].Value == dv) - return; - if (LiveRegs[rx].Value) - release(LiveRegs[rx].Value); - LiveRegs[rx].Value = retain(dv); -} - -// Kill register rx, recycle or collapse any DomainValue. -void ExecutionDepsFix::kill(int rx) { - assert(unsigned(rx) < NumRegs && "Invalid index"); - assert(LiveRegs && "Must enter basic block first."); - if (!LiveRegs[rx].Value) - return; - - release(LiveRegs[rx].Value); - LiveRegs[rx].Value = nullptr; -} - -/// Force register rx into domain. -void ExecutionDepsFix::force(int rx, unsigned domain) { - assert(unsigned(rx) < NumRegs && "Invalid index"); - assert(LiveRegs && "Must enter basic block first."); - if (DomainValue *dv = LiveRegs[rx].Value) { - if (dv->isCollapsed()) - dv->addDomain(domain); - else if (dv->hasDomain(domain)) - collapse(dv, domain); - else { - // This is an incompatible open DomainValue. Collapse it to whatever and - // force the new value into domain. This costs a domain crossing. - collapse(dv, dv->getFirstDomain()); - assert(LiveRegs[rx].Value && "Not live after collapse?"); - LiveRegs[rx].Value->addDomain(domain); - } - } else { - // Set up basic collapsed DomainValue. - setLiveReg(rx, alloc(domain)); - } -} - -/// Collapse open DomainValue into given domain. If there are multiple -/// registers using dv, they each get a unique collapsed DomainValue. -void ExecutionDepsFix::collapse(DomainValue *dv, unsigned domain) { - assert(dv->hasDomain(domain) && "Cannot collapse"); - - // Collapse all the instructions. - while (!dv->Instrs.empty()) - TII->setExecutionDomain(*dv->Instrs.pop_back_val(), domain); - dv->setSingleDomain(domain); - - // If there are multiple users, give them new, unique DomainValues. - if (LiveRegs && dv->Refs > 1) - for (unsigned rx = 0; rx != NumRegs; ++rx) - if (LiveRegs[rx].Value == dv) - setLiveReg(rx, alloc(domain)); -} - -/// All instructions and registers in B are moved to A, and B is released. -bool ExecutionDepsFix::merge(DomainValue *A, DomainValue *B) { - assert(!A->isCollapsed() && "Cannot merge into collapsed"); - assert(!B->isCollapsed() && "Cannot merge from collapsed"); - if (A == B) - return true; - // Restrict to the domains that A and B have in common. - unsigned common = A->getCommonDomains(B->AvailableDomains); - if (!common) - return false; - A->AvailableDomains = common; - A->Instrs.append(B->Instrs.begin(), B->Instrs.end()); - - // Clear the old DomainValue so we won't try to swizzle instructions twice. - B->clear(); - // All uses of B are referred to A. - B->Next = retain(A); - - for (unsigned rx = 0; rx != NumRegs; ++rx) { - assert(LiveRegs && "no space allocated for live registers"); - if (LiveRegs[rx].Value == B) - setLiveReg(rx, A); - } - return true; -} - -/// Set up LiveRegs by merging predecessor live-out values. -void ExecutionDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { - // Reset instruction counter in each basic block. - CurInstr = 0; - - // Set up UndefReads to track undefined register reads. - UndefReads.clear(); - LiveRegSet.clear(); - - // Set up LiveRegs to represent registers entering MBB. - if (!LiveRegs) - LiveRegs = new LiveReg[NumRegs]; - - // Default values are 'nothing happened a long time ago'. - for (unsigned rx = 0; rx != NumRegs; ++rx) { - LiveRegs[rx].Value = nullptr; - LiveRegs[rx].Def = -(1 << 20); - } - - // This is the entry block. - if (MBB->pred_empty()) { - for (const auto &LI : MBB->liveins()) { - for (int rx : regIndices(LI.PhysReg)) { - // Treat function live-ins as if they were defined just before the first - // instruction. Usually, function arguments are set up immediately - // before the call. - LiveRegs[rx].Def = -1; - } - } - DEBUG(dbgs() << printMBBReference(*MBB) << ": entry\n"); - return; - } - - // Try to coalesce live-out registers from predecessors. - for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(), - pe = MBB->pred_end(); pi != pe; ++pi) { - auto fi = MBBInfos.find(*pi); - assert(fi != MBBInfos.end() && - "Should have pre-allocated MBBInfos for all MBBs"); - LiveReg *Incoming = fi->second.OutRegs; - // Incoming is null if this is a backedge from a BB - // we haven't processed yet - if (Incoming == nullptr) { - continue; - } - - for (unsigned rx = 0; rx != NumRegs; ++rx) { - // Use the most recent predecessor def for each register. - LiveRegs[rx].Def = std::max(LiveRegs[rx].Def, Incoming[rx].Def); - - DomainValue *pdv = resolve(Incoming[rx].Value); - if (!pdv) - continue; - if (!LiveRegs[rx].Value) { - setLiveReg(rx, pdv); - continue; - } - - // We have a live DomainValue from more than one predecessor. - if (LiveRegs[rx].Value->isCollapsed()) { - // We are already collapsed, but predecessor is not. Force it. - unsigned Domain = LiveRegs[rx].Value->getFirstDomain(); - if (!pdv->isCollapsed() && pdv->hasDomain(Domain)) - collapse(pdv, Domain); - continue; - } - - // Currently open, merge in predecessor. - if (!pdv->isCollapsed()) - merge(LiveRegs[rx].Value, pdv); - else - force(rx, pdv->getFirstDomain()); - } - } - DEBUG( - dbgs() << printMBBReference(*MBB) - << (!isBlockDone(MBB) ? ": incomplete\n" : ": all preds known\n")); -} - -void ExecutionDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) { - assert(LiveRegs && "Must enter basic block first."); - LiveReg *OldOutRegs = MBBInfos[MBB].OutRegs; - // Save register clearances at end of MBB - used by enterBasicBlock(). - MBBInfos[MBB].OutRegs = LiveRegs; - - // While processing the basic block, we kept `Def` relative to the start - // of the basic block for convenience. However, future use of this information - // only cares about the clearance from the end of the block, so adjust - // everything to be relative to the end of the basic block. - for (unsigned i = 0, e = NumRegs; i != e; ++i) - LiveRegs[i].Def -= CurInstr; - if (OldOutRegs) { - // This must be the second pass. - // Release all the DomainValues instead of keeping them. - for (unsigned i = 0, e = NumRegs; i != e; ++i) - release(OldOutRegs[i].Value); - delete[] OldOutRegs; - } - LiveRegs = nullptr; -} - -bool ExecutionDepsFix::visitInstr(MachineInstr *MI) { - // Update instructions with explicit execution domains. - std::pair<uint16_t, uint16_t> DomP = TII->getExecutionDomain(*MI); - if (DomP.first) { - if (DomP.second) - visitSoftInstr(MI, DomP.second); - else - visitHardInstr(MI, DomP.first); - } - - return !DomP.first; -} - -/// \brief Helps avoid false dependencies on undef registers by updating the -/// machine instructions' undef operand to use a register that the instruction -/// is truly dependent on, or use a register with clearance higher than Pref. -/// Returns true if it was able to find a true dependency, thus not requiring -/// a dependency breaking instruction regardless of clearance. -bool ExecutionDepsFix::pickBestRegisterForUndef(MachineInstr *MI, - unsigned OpIdx, unsigned Pref) { - MachineOperand &MO = MI->getOperand(OpIdx); - assert(MO.isUndef() && "Expected undef machine operand"); - - unsigned OriginalReg = MO.getReg(); - - // Update only undef operands that are mapped to one register. - if (AliasMap[OriginalReg].size() != 1) - return false; - - // Get the undef operand's register class - const TargetRegisterClass *OpRC = - TII->getRegClass(MI->getDesc(), OpIdx, TRI, *MF); - - // If the instruction has a true dependency, we can hide the false depdency - // behind it. - for (MachineOperand &CurrMO : MI->operands()) { - if (!CurrMO.isReg() || CurrMO.isDef() || CurrMO.isUndef() || - !OpRC->contains(CurrMO.getReg())) - continue; - // We found a true dependency - replace the undef register with the true - // dependency. - MO.setReg(CurrMO.getReg()); - return true; - } - - // Go over all registers in the register class and find the register with - // max clearance or clearance higher than Pref. - unsigned MaxClearance = 0; - unsigned MaxClearanceReg = OriginalReg; - ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(OpRC); - for (auto Reg : Order) { - assert(AliasMap[Reg].size() == 1 && - "Reg is expected to be mapped to a single index"); - int RCrx = *regIndices(Reg).begin(); - unsigned Clearance = CurInstr - LiveRegs[RCrx].Def; - if (Clearance <= MaxClearance) - continue; - MaxClearance = Clearance; - MaxClearanceReg = Reg; - - if (MaxClearance > Pref) - break; - } - - // Update the operand if we found a register with better clearance. - if (MaxClearanceReg != OriginalReg) - MO.setReg(MaxClearanceReg); - - return false; -} - -/// \brief Return true to if it makes sense to break dependence on a partial def -/// or undef use. -bool ExecutionDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx, - unsigned Pref) { - unsigned reg = MI->getOperand(OpIdx).getReg(); - for (int rx : regIndices(reg)) { - unsigned Clearance = CurInstr - LiveRegs[rx].Def; - DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); - - if (Pref > Clearance) { - DEBUG(dbgs() << ": Break dependency.\n"); - continue; - } - DEBUG(dbgs() << ": OK .\n"); - return false; - } - return true; -} - -// Update def-ages for registers defined by MI. -// If Kill is set, also kill off DomainValues clobbered by the defs. -// -// Also break dependencies on partial defs and undef uses. -void ExecutionDepsFix::processDefs(MachineInstr *MI, bool breakDependency, - bool Kill) { - assert(!MI->isDebugValue() && "Won't process debug values"); - - // Break dependence on undef uses. Do this before updating LiveRegs below. - unsigned OpNum; - if (breakDependency) { - unsigned Pref = TII->getUndefRegClearance(*MI, OpNum, TRI); - if (Pref) { - bool HadTrueDependency = pickBestRegisterForUndef(MI, OpNum, Pref); - // We don't need to bother trying to break a dependency if this - // instruction has a true dependency on that register through another - // operand - we'll have to wait for it to be available regardless. - if (!HadTrueDependency && shouldBreakDependence(MI, OpNum, Pref)) - UndefReads.push_back(std::make_pair(MI, OpNum)); - } - } - const MCInstrDesc &MCID = MI->getDesc(); - for (unsigned i = 0, - e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); - i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) - continue; - if (MO.isUse()) - continue; - for (int rx : regIndices(MO.getReg())) { - // This instruction explicitly defines rx. - DEBUG(dbgs() << printReg(RC->getRegister(rx), TRI) << ":\t" << CurInstr - << '\t' << *MI); - - if (breakDependency) { - // Check clearance before partial register updates. - // Call breakDependence before setting LiveRegs[rx].Def. - unsigned Pref = TII->getPartialRegUpdateClearance(*MI, i, TRI); - if (Pref && shouldBreakDependence(MI, i, Pref)) - TII->breakPartialRegDependency(*MI, i, TRI); - } - - // How many instructions since rx was last written? - LiveRegs[rx].Def = CurInstr; - - // Kill off domains redefined by generic instructions. - if (Kill) - kill(rx); - } - } - ++CurInstr; -} - -/// \break Break false dependencies on undefined register reads. -/// -/// Walk the block backward computing precise liveness. This is expensive, so we -/// only do it on demand. Note that the occurrence of undefined register reads -/// that should be broken is very rare, but when they occur we may have many in -/// a single block. -void ExecutionDepsFix::processUndefReads(MachineBasicBlock *MBB) { - if (UndefReads.empty()) - return; - - // Collect this block's live out register units. - LiveRegSet.init(*TRI); - // We do not need to care about pristine registers as they are just preserved - // but not actually used in the function. - LiveRegSet.addLiveOutsNoPristines(*MBB); - - MachineInstr *UndefMI = UndefReads.back().first; - unsigned OpIdx = UndefReads.back().second; - - for (MachineInstr &I : make_range(MBB->rbegin(), MBB->rend())) { - // Update liveness, including the current instruction's defs. - LiveRegSet.stepBackward(I); - - if (UndefMI == &I) { - if (!LiveRegSet.contains(UndefMI->getOperand(OpIdx).getReg())) - TII->breakPartialRegDependency(*UndefMI, OpIdx, TRI); - - UndefReads.pop_back(); - if (UndefReads.empty()) - return; - - UndefMI = UndefReads.back().first; - OpIdx = UndefReads.back().second; - } - } -} - -// A hard instruction only works in one domain. All input registers will be -// forced into that domain. -void ExecutionDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) { - // Collapse all uses. - for (unsigned i = mi->getDesc().getNumDefs(), - e = mi->getDesc().getNumOperands(); i != e; ++i) { - MachineOperand &mo = mi->getOperand(i); - if (!mo.isReg()) continue; - for (int rx : regIndices(mo.getReg())) { - force(rx, domain); - } - } - - // Kill all defs and force them. - for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { - MachineOperand &mo = mi->getOperand(i); - if (!mo.isReg()) continue; - for (int rx : regIndices(mo.getReg())) { - kill(rx); - force(rx, domain); - } - } -} - -// A soft instruction can be changed to work in other domains given by mask. -void ExecutionDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { - // Bitmask of available domains for this instruction after taking collapsed - // operands into account. - unsigned available = mask; - - // Scan the explicit use operands for incoming domains. - SmallVector<int, 4> used; - if (LiveRegs) - for (unsigned i = mi->getDesc().getNumDefs(), - e = mi->getDesc().getNumOperands(); i != e; ++i) { - MachineOperand &mo = mi->getOperand(i); - if (!mo.isReg()) continue; - for (int rx : regIndices(mo.getReg())) { - DomainValue *dv = LiveRegs[rx].Value; - if (dv == nullptr) - continue; - // Bitmask of domains that dv and available have in common. - unsigned common = dv->getCommonDomains(available); - // Is it possible to use this collapsed register for free? - if (dv->isCollapsed()) { - // Restrict available domains to the ones in common with the operand. - // If there are no common domains, we must pay the cross-domain - // penalty for this operand. - if (common) available = common; - } else if (common) - // Open DomainValue is compatible, save it for merging. - used.push_back(rx); - else - // Open DomainValue is not compatible with instruction. It is useless - // now. - kill(rx); - } - } - - // If the collapsed operands force a single domain, propagate the collapse. - if (isPowerOf2_32(available)) { - unsigned domain = countTrailingZeros(available); - TII->setExecutionDomain(*mi, domain); - visitHardInstr(mi, domain); - return; - } - - // Kill off any remaining uses that don't match available, and build a list of - // incoming DomainValues that we want to merge. - SmallVector<const LiveReg *, 4> Regs; - for (int rx : used) { - assert(LiveRegs && "no space allocated for live registers"); - const LiveReg &LR = LiveRegs[rx]; - // This useless DomainValue could have been missed above. - if (!LR.Value->getCommonDomains(available)) { - kill(rx); - continue; - } - // Sorted insertion. - auto I = std::upper_bound(Regs.begin(), Regs.end(), &LR, - [](const LiveReg *LHS, const LiveReg *RHS) { - return LHS->Def < RHS->Def; - }); - Regs.insert(I, &LR); - } - - // doms are now sorted in order of appearance. Try to merge them all, giving - // priority to the latest ones. - DomainValue *dv = nullptr; - while (!Regs.empty()) { - if (!dv) { - dv = Regs.pop_back_val()->Value; - // Force the first dv to match the current instruction. - dv->AvailableDomains = dv->getCommonDomains(available); - assert(dv->AvailableDomains && "Domain should have been filtered"); - continue; - } - - DomainValue *Latest = Regs.pop_back_val()->Value; - // Skip already merged values. - if (Latest == dv || Latest->Next) - continue; - if (merge(dv, Latest)) - continue; - - // If latest didn't merge, it is useless now. Kill all registers using it. - for (int i : used) { - assert(LiveRegs && "no space allocated for live registers"); - if (LiveRegs[i].Value == Latest) - kill(i); - } - } - - // dv is the DomainValue we are going to use for this instruction. - if (!dv) { - dv = alloc(); - dv->AvailableDomains = available; - } - dv->Instrs.push_back(mi); - - // Finally set all defs and non-collapsed uses to dv. We must iterate through - // all the operators, including imp-def ones. - for (MachineInstr::mop_iterator ii = mi->operands_begin(), - ee = mi->operands_end(); - ii != ee; ++ii) { - MachineOperand &mo = *ii; - if (!mo.isReg()) continue; - for (int rx : regIndices(mo.getReg())) { - if (!LiveRegs[rx].Value || (mo.isDef() && LiveRegs[rx].Value != dv)) { - kill(rx); - setLiveReg(rx, dv); - } - } - } -} - -void ExecutionDepsFix::processBasicBlock(MachineBasicBlock *MBB, - bool PrimaryPass) { - enterBasicBlock(MBB); - // If this block is not done, it makes little sense to make any decisions - // based on clearance information. We need to make a second pass anyway, - // and by then we'll have better information, so we can avoid doing the work - // to try and break dependencies now. - bool breakDependency = isBlockDone(MBB); - for (MachineInstr &MI : *MBB) { - if (!MI.isDebugValue()) { - bool Kill = false; - if (PrimaryPass) - Kill = visitInstr(&MI); - processDefs(&MI, breakDependency, Kill); - } - } - if (breakDependency) - processUndefReads(MBB); - leaveBasicBlock(MBB); -} - -bool ExecutionDepsFix::isBlockDone(MachineBasicBlock *MBB) { - return MBBInfos[MBB].PrimaryCompleted && - MBBInfos[MBB].IncomingCompleted == MBBInfos[MBB].PrimaryIncoming && - MBBInfos[MBB].IncomingProcessed == MBB->pred_size(); -} - -bool ExecutionDepsFix::runOnMachineFunction(MachineFunction &mf) { - if (skipFunction(mf.getFunction())) - return false; - MF = &mf; - TII = MF->getSubtarget().getInstrInfo(); - TRI = MF->getSubtarget().getRegisterInfo(); - RegClassInfo.runOnMachineFunction(mf); - LiveRegs = nullptr; - assert(NumRegs == RC->getNumRegs() && "Bad regclass"); - - DEBUG(dbgs() << "********** FIX EXECUTION DEPENDENCIES: " - << TRI->getRegClassName(RC) << " **********\n"); - - // If no relevant registers are used in the function, we can skip it - // completely. - bool anyregs = false; - const MachineRegisterInfo &MRI = mf.getRegInfo(); - for (unsigned Reg : *RC) { - if (MRI.isPhysRegUsed(Reg)) { - anyregs = true; - break; - } - } - if (!anyregs) return false; - - // Initialize the AliasMap on the first use. - if (AliasMap.empty()) { - // Given a PhysReg, AliasMap[PhysReg] returns a list of indices into RC and - // therefore the LiveRegs array. - AliasMap.resize(TRI->getNumRegs()); - for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i) - for (MCRegAliasIterator AI(RC->getRegister(i), TRI, true); - AI.isValid(); ++AI) - AliasMap[*AI].push_back(i); - } - - // Initialize the MMBInfos - for (auto &MBB : mf) { - MBBInfo InitialInfo; - MBBInfos.insert(std::make_pair(&MBB, InitialInfo)); - } - - /* - * We want to visit every instruction in every basic block in order to update - * it's execution domain or break any false dependencies. However, for the - * dependency breaking, we need to know clearances from all predecessors - * (including any backedges). One way to do so would be to do two complete - * passes over all basic blocks/instructions, the first for recording - * clearances, the second to break the dependencies. However, for functions - * without backedges, or functions with a lot of straight-line code, and - * a small loop, that would be a lot of unnecessary work (since only the - * BBs that are part of the loop require two passes). As an example, - * consider the following loop. - * - * - * PH -> A -> B (xmm<Undef> -> xmm<Def>) -> C -> D -> EXIT - * ^ | - * +----------------------------------+ - * - * The iteration order is as follows: - * Naive: PH A B C D A' B' C' D' - * Optimized: PH A B C A' B' C' D - * - * Note that we avoid processing D twice, because we can entirely process - * the predecessors before getting to D. We call a block that is ready - * for its second round of processing `done` (isBlockDone). Once we finish - * processing some block, we update the counters in MBBInfos and re-process - * any successors that are now done. - */ - - MachineBasicBlock *Entry = &*MF->begin(); - ReversePostOrderTraversal<MachineBasicBlock*> RPOT(Entry); - SmallVector<MachineBasicBlock *, 4> Workqueue; - for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator - MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) { - MachineBasicBlock *MBB = *MBBI; - // N.B: IncomingProcessed and IncomingCompleted were already updated while - // processing this block's predecessors. - MBBInfos[MBB].PrimaryCompleted = true; - MBBInfos[MBB].PrimaryIncoming = MBBInfos[MBB].IncomingProcessed; - bool Primary = true; - Workqueue.push_back(MBB); - while (!Workqueue.empty()) { - MachineBasicBlock *ActiveMBB = &*Workqueue.back(); - Workqueue.pop_back(); - processBasicBlock(ActiveMBB, Primary); - bool Done = isBlockDone(ActiveMBB); - for (auto *Succ : ActiveMBB->successors()) { - if (!isBlockDone(Succ)) { - if (Primary) { - MBBInfos[Succ].IncomingProcessed++; - } - if (Done) { - MBBInfos[Succ].IncomingCompleted++; - } - if (isBlockDone(Succ)) { - Workqueue.push_back(Succ); - } - } - } - Primary = false; - } - } - - // We need to go through again and finalize any blocks that are not done yet. - // This is possible if blocks have dead predecessors, so we didn't visit them - // above. - for (ReversePostOrderTraversal<MachineBasicBlock *>::rpo_iterator - MBBI = RPOT.begin(), - MBBE = RPOT.end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock *MBB = *MBBI; - if (!isBlockDone(MBB)) { - processBasicBlock(MBB, false); - // Don't update successors here. We'll get to them anyway through this - // loop. - } - } - - // Clear the LiveOuts vectors and collapse any remaining DomainValues. - for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator - MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) { - auto FI = MBBInfos.find(*MBBI); - if (FI == MBBInfos.end() || !FI->second.OutRegs) - continue; - for (unsigned i = 0, e = NumRegs; i != e; ++i) - if (FI->second.OutRegs[i].Value) - release(FI->second.OutRegs[i].Value); - delete[] FI->second.OutRegs; - } - MBBInfos.clear(); - UndefReads.clear(); - Avail.clear(); - Allocator.DestroyAll(); - - return false; -} diff --git a/contrib/llvm/lib/CodeGen/ExecutionDomainFix.cpp b/contrib/llvm/lib/CodeGen/ExecutionDomainFix.cpp new file mode 100644 index 000000000000..458dcf2b0e26 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ExecutionDomainFix.cpp @@ -0,0 +1,473 @@ +//===- ExecutionDomainFix.cpp - Fix execution domain issues ----*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/ExecutionDomainFix.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "execution-deps-fix" + +iterator_range<SmallVectorImpl<int>::const_iterator> +ExecutionDomainFix::regIndices(unsigned Reg) const { + assert(Reg < AliasMap.size() && "Invalid register"); + const auto &Entry = AliasMap[Reg]; + return make_range(Entry.begin(), Entry.end()); +} + +DomainValue *ExecutionDomainFix::alloc(int domain) { + DomainValue *dv = Avail.empty() ? new (Allocator.Allocate()) DomainValue + : Avail.pop_back_val(); + if (domain >= 0) + dv->addDomain(domain); + assert(dv->Refs == 0 && "Reference count wasn't cleared"); + assert(!dv->Next && "Chained DomainValue shouldn't have been recycled"); + return dv; +} + +void ExecutionDomainFix::release(DomainValue *DV) { + while (DV) { + assert(DV->Refs && "Bad DomainValue"); + if (--DV->Refs) + return; + + // There are no more DV references. Collapse any contained instructions. + if (DV->AvailableDomains && !DV->isCollapsed()) + collapse(DV, DV->getFirstDomain()); + + DomainValue *Next = DV->Next; + DV->clear(); + Avail.push_back(DV); + // Also release the next DomainValue in the chain. + DV = Next; + } +} + +DomainValue *ExecutionDomainFix::resolve(DomainValue *&DVRef) { + DomainValue *DV = DVRef; + if (!DV || !DV->Next) + return DV; + + // DV has a chain. Find the end. + do + DV = DV->Next; + while (DV->Next); + + // Update DVRef to point to DV. + retain(DV); + release(DVRef); + DVRef = DV; + return DV; +} + +void ExecutionDomainFix::setLiveReg(int rx, DomainValue *dv) { + assert(unsigned(rx) < NumRegs && "Invalid index"); + assert(!LiveRegs.empty() && "Must enter basic block first."); + + if (LiveRegs[rx] == dv) + return; + if (LiveRegs[rx]) + release(LiveRegs[rx]); + LiveRegs[rx] = retain(dv); +} + +void ExecutionDomainFix::kill(int rx) { + assert(unsigned(rx) < NumRegs && "Invalid index"); + assert(!LiveRegs.empty() && "Must enter basic block first."); + if (!LiveRegs[rx]) + return; + + release(LiveRegs[rx]); + LiveRegs[rx] = nullptr; +} + +void ExecutionDomainFix::force(int rx, unsigned domain) { + assert(unsigned(rx) < NumRegs && "Invalid index"); + assert(!LiveRegs.empty() && "Must enter basic block first."); + if (DomainValue *dv = LiveRegs[rx]) { + if (dv->isCollapsed()) + dv->addDomain(domain); + else if (dv->hasDomain(domain)) + collapse(dv, domain); + else { + // This is an incompatible open DomainValue. Collapse it to whatever and + // force the new value into domain. This costs a domain crossing. + collapse(dv, dv->getFirstDomain()); + assert(LiveRegs[rx] && "Not live after collapse?"); + LiveRegs[rx]->addDomain(domain); + } + } else { + // Set up basic collapsed DomainValue. + setLiveReg(rx, alloc(domain)); + } +} + +void ExecutionDomainFix::collapse(DomainValue *dv, unsigned domain) { + assert(dv->hasDomain(domain) && "Cannot collapse"); + + // Collapse all the instructions. + while (!dv->Instrs.empty()) + TII->setExecutionDomain(*dv->Instrs.pop_back_val(), domain); + dv->setSingleDomain(domain); + + // If there are multiple users, give them new, unique DomainValues. + if (!LiveRegs.empty() && dv->Refs > 1) + for (unsigned rx = 0; rx != NumRegs; ++rx) + if (LiveRegs[rx] == dv) + setLiveReg(rx, alloc(domain)); +} + +bool ExecutionDomainFix::merge(DomainValue *A, DomainValue *B) { + assert(!A->isCollapsed() && "Cannot merge into collapsed"); + assert(!B->isCollapsed() && "Cannot merge from collapsed"); + if (A == B) + return true; + // Restrict to the domains that A and B have in common. + unsigned common = A->getCommonDomains(B->AvailableDomains); + if (!common) + return false; + A->AvailableDomains = common; + A->Instrs.append(B->Instrs.begin(), B->Instrs.end()); + + // Clear the old DomainValue so we won't try to swizzle instructions twice. + B->clear(); + // All uses of B are referred to A. + B->Next = retain(A); + + for (unsigned rx = 0; rx != NumRegs; ++rx) { + assert(!LiveRegs.empty() && "no space allocated for live registers"); + if (LiveRegs[rx] == B) + setLiveReg(rx, A); + } + return true; +} + +void ExecutionDomainFix::enterBasicBlock( + const LoopTraversal::TraversedMBBInfo &TraversedMBB) { + + MachineBasicBlock *MBB = TraversedMBB.MBB; + + // Set up LiveRegs to represent registers entering MBB. + // Set default domain values to 'no domain' (nullptr) + if (LiveRegs.empty()) + LiveRegs.assign(NumRegs, nullptr); + + // This is the entry block. + if (MBB->pred_empty()) { + LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << ": entry\n"); + return; + } + + // Try to coalesce live-out registers from predecessors. + for (MachineBasicBlock *pred : MBB->predecessors()) { + assert(unsigned(pred->getNumber()) < MBBOutRegsInfos.size() && + "Should have pre-allocated MBBInfos for all MBBs"); + LiveRegsDVInfo &Incoming = MBBOutRegsInfos[pred->getNumber()]; + // Incoming is null if this is a backedge from a BB + // we haven't processed yet + if (Incoming.empty()) + continue; + + for (unsigned rx = 0; rx != NumRegs; ++rx) { + DomainValue *pdv = resolve(Incoming[rx]); + if (!pdv) + continue; + if (!LiveRegs[rx]) { + setLiveReg(rx, pdv); + continue; + } + + // We have a live DomainValue from more than one predecessor. + if (LiveRegs[rx]->isCollapsed()) { + // We are already collapsed, but predecessor is not. Force it. + unsigned Domain = LiveRegs[rx]->getFirstDomain(); + if (!pdv->isCollapsed() && pdv->hasDomain(Domain)) + collapse(pdv, Domain); + continue; + } + + // Currently open, merge in predecessor. + if (!pdv->isCollapsed()) + merge(LiveRegs[rx], pdv); + else + force(rx, pdv->getFirstDomain()); + } + } + LLVM_DEBUG(dbgs() << printMBBReference(*MBB) + << (!TraversedMBB.IsDone ? ": incomplete\n" + : ": all preds known\n")); +} + +void ExecutionDomainFix::leaveBasicBlock( + const LoopTraversal::TraversedMBBInfo &TraversedMBB) { + assert(!LiveRegs.empty() && "Must enter basic block first."); + unsigned MBBNumber = TraversedMBB.MBB->getNumber(); + assert(MBBNumber < MBBOutRegsInfos.size() && + "Unexpected basic block number."); + // Save register clearances at end of MBB - used by enterBasicBlock(). + for (DomainValue *OldLiveReg : MBBOutRegsInfos[MBBNumber]) { + release(OldLiveReg); + } + MBBOutRegsInfos[MBBNumber] = LiveRegs; + LiveRegs.clear(); +} + +bool ExecutionDomainFix::visitInstr(MachineInstr *MI) { + // Update instructions with explicit execution domains. + std::pair<uint16_t, uint16_t> DomP = TII->getExecutionDomain(*MI); + if (DomP.first) { + if (DomP.second) + visitSoftInstr(MI, DomP.second); + else + visitHardInstr(MI, DomP.first); + } + + return !DomP.first; +} + +void ExecutionDomainFix::processDefs(MachineInstr *MI, bool Kill) { + assert(!MI->isDebugInstr() && "Won't process debug values"); + const MCInstrDesc &MCID = MI->getDesc(); + for (unsigned i = 0, + e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); + i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + if (MO.isUse()) + continue; + for (int rx : regIndices(MO.getReg())) { + // This instruction explicitly defines rx. + LLVM_DEBUG(dbgs() << printReg(RC->getRegister(rx), TRI) << ":\t" << *MI); + + // Kill off domains redefined by generic instructions. + if (Kill) + kill(rx); + } + } +} + +void ExecutionDomainFix::visitHardInstr(MachineInstr *mi, unsigned domain) { + // Collapse all uses. + for (unsigned i = mi->getDesc().getNumDefs(), + e = mi->getDesc().getNumOperands(); + i != e; ++i) { + MachineOperand &mo = mi->getOperand(i); + if (!mo.isReg()) + continue; + for (int rx : regIndices(mo.getReg())) { + force(rx, domain); + } + } + + // Kill all defs and force them. + for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { + MachineOperand &mo = mi->getOperand(i); + if (!mo.isReg()) + continue; + for (int rx : regIndices(mo.getReg())) { + kill(rx); + force(rx, domain); + } + } +} + +void ExecutionDomainFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { + // Bitmask of available domains for this instruction after taking collapsed + // operands into account. + unsigned available = mask; + + // Scan the explicit use operands for incoming domains. + SmallVector<int, 4> used; + if (!LiveRegs.empty()) + for (unsigned i = mi->getDesc().getNumDefs(), + e = mi->getDesc().getNumOperands(); + i != e; ++i) { + MachineOperand &mo = mi->getOperand(i); + if (!mo.isReg()) + continue; + for (int rx : regIndices(mo.getReg())) { + DomainValue *dv = LiveRegs[rx]; + if (dv == nullptr) + continue; + // Bitmask of domains that dv and available have in common. + unsigned common = dv->getCommonDomains(available); + // Is it possible to use this collapsed register for free? + if (dv->isCollapsed()) { + // Restrict available domains to the ones in common with the operand. + // If there are no common domains, we must pay the cross-domain + // penalty for this operand. + if (common) + available = common; + } else if (common) + // Open DomainValue is compatible, save it for merging. + used.push_back(rx); + else + // Open DomainValue is not compatible with instruction. It is useless + // now. + kill(rx); + } + } + + // If the collapsed operands force a single domain, propagate the collapse. + if (isPowerOf2_32(available)) { + unsigned domain = countTrailingZeros(available); + TII->setExecutionDomain(*mi, domain); + visitHardInstr(mi, domain); + return; + } + + // Kill off any remaining uses that don't match available, and build a list of + // incoming DomainValues that we want to merge. + SmallVector<int, 4> Regs; + for (int rx : used) { + assert(!LiveRegs.empty() && "no space allocated for live registers"); + DomainValue *&LR = LiveRegs[rx]; + // This useless DomainValue could have been missed above. + if (!LR->getCommonDomains(available)) { + kill(rx); + continue; + } + // Sorted insertion. + // Enables giving priority to the latest domains during merging. + auto I = std::upper_bound( + Regs.begin(), Regs.end(), rx, [&](int LHS, const int RHS) { + return RDA->getReachingDef(mi, RC->getRegister(LHS)) < + RDA->getReachingDef(mi, RC->getRegister(RHS)); + }); + Regs.insert(I, rx); + } + + // doms are now sorted in order of appearance. Try to merge them all, giving + // priority to the latest ones. + DomainValue *dv = nullptr; + while (!Regs.empty()) { + if (!dv) { + dv = LiveRegs[Regs.pop_back_val()]; + // Force the first dv to match the current instruction. + dv->AvailableDomains = dv->getCommonDomains(available); + assert(dv->AvailableDomains && "Domain should have been filtered"); + continue; + } + + DomainValue *Latest = LiveRegs[Regs.pop_back_val()]; + // Skip already merged values. + if (Latest == dv || Latest->Next) + continue; + if (merge(dv, Latest)) + continue; + + // If latest didn't merge, it is useless now. Kill all registers using it. + for (int i : used) { + assert(!LiveRegs.empty() && "no space allocated for live registers"); + if (LiveRegs[i] == Latest) + kill(i); + } + } + + // dv is the DomainValue we are going to use for this instruction. + if (!dv) { + dv = alloc(); + dv->AvailableDomains = available; + } + dv->Instrs.push_back(mi); + + // Finally set all defs and non-collapsed uses to dv. We must iterate through + // all the operators, including imp-def ones. + for (MachineOperand &mo : mi->operands()) { + if (!mo.isReg()) + continue; + for (int rx : regIndices(mo.getReg())) { + if (!LiveRegs[rx] || (mo.isDef() && LiveRegs[rx] != dv)) { + kill(rx); + setLiveReg(rx, dv); + } + } + } +} + +void ExecutionDomainFix::processBasicBlock( + const LoopTraversal::TraversedMBBInfo &TraversedMBB) { + enterBasicBlock(TraversedMBB); + // If this block is not done, it makes little sense to make any decisions + // based on clearance information. We need to make a second pass anyway, + // and by then we'll have better information, so we can avoid doing the work + // to try and break dependencies now. + for (MachineInstr &MI : *TraversedMBB.MBB) { + if (!MI.isDebugInstr()) { + bool Kill = false; + if (TraversedMBB.PrimaryPass) + Kill = visitInstr(&MI); + processDefs(&MI, Kill); + } + } + leaveBasicBlock(TraversedMBB); +} + +bool ExecutionDomainFix::runOnMachineFunction(MachineFunction &mf) { + if (skipFunction(mf.getFunction())) + return false; + MF = &mf; + TII = MF->getSubtarget().getInstrInfo(); + TRI = MF->getSubtarget().getRegisterInfo(); + LiveRegs.clear(); + assert(NumRegs == RC->getNumRegs() && "Bad regclass"); + + LLVM_DEBUG(dbgs() << "********** FIX EXECUTION DOMAIN: " + << TRI->getRegClassName(RC) << " **********\n"); + + // If no relevant registers are used in the function, we can skip it + // completely. + bool anyregs = false; + const MachineRegisterInfo &MRI = mf.getRegInfo(); + for (unsigned Reg : *RC) { + if (MRI.isPhysRegUsed(Reg)) { + anyregs = true; + break; + } + } + if (!anyregs) + return false; + + RDA = &getAnalysis<ReachingDefAnalysis>(); + + // Initialize the AliasMap on the first use. + if (AliasMap.empty()) { + // Given a PhysReg, AliasMap[PhysReg] returns a list of indices into RC and + // therefore the LiveRegs array. + AliasMap.resize(TRI->getNumRegs()); + for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i) + for (MCRegAliasIterator AI(RC->getRegister(i), TRI, true); AI.isValid(); + ++AI) + AliasMap[*AI].push_back(i); + } + + // Initialize the MBBOutRegsInfos + MBBOutRegsInfos.resize(mf.getNumBlockIDs()); + + // Traverse the basic blocks. + LoopTraversal Traversal; + LoopTraversal::TraversalOrder TraversedMBBOrder = Traversal.traverse(mf); + for (LoopTraversal::TraversedMBBInfo TraversedMBB : TraversedMBBOrder) { + processBasicBlock(TraversedMBB); + } + + for (LiveRegsDVInfo OutLiveRegs : MBBOutRegsInfos) { + for (DomainValue *OutLiveReg : OutLiveRegs) { + if (OutLiveReg) + release(OutLiveReg); + } + } + MBBOutRegsInfos.clear(); + Avail.clear(); + Allocator.DestroyAll(); + + return false; +} diff --git a/contrib/llvm/lib/CodeGen/ExpandMemCmp.cpp b/contrib/llvm/lib/CodeGen/ExpandMemCmp.cpp index 09c808463a41..d7562cbf1e90 100644 --- a/contrib/llvm/lib/CodeGen/ExpandMemCmp.cpp +++ b/contrib/llvm/lib/CodeGen/ExpandMemCmp.cpp @@ -32,7 +32,7 @@ STATISTIC(NumMemCmpGreaterThanMax, "Number of memcmp calls with size greater than max size"); STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls"); -static cl::opt<unsigned> MemCmpNumLoadsPerBlock( +static cl::opt<unsigned> MemCmpEqZeroNumLoadsPerBlock( "memcmp-num-loads-per-block", cl::Hidden, cl::init(1), cl::desc("The number of loads per basic block for inline expansion of " "memcmp that is only being compared against zero.")); @@ -56,7 +56,7 @@ class MemCmpExpansion { const uint64_t Size; unsigned MaxLoadSize; uint64_t NumLoadsNonOneByte; - const uint64_t NumLoadsPerBlock; + const uint64_t NumLoadsPerBlockForZeroCmp; std::vector<BasicBlock *> LoadCmpBlocks; BasicBlock *EndBlock; PHINode *PhiRes; @@ -102,7 +102,7 @@ class MemCmpExpansion { MemCmpExpansion(CallInst *CI, uint64_t Size, const TargetTransformInfo::MemCmpExpansionOptions &Options, unsigned MaxNumLoads, const bool IsUsedForZeroCmp, - unsigned NumLoadsPerBlock, const DataLayout &DL); + unsigned MaxLoadsPerBlockForZeroCmp, const DataLayout &TheDataLayout); unsigned getNumBlocks(); uint64_t getNumLoads() const { return LoadSequence.size(); } @@ -122,12 +122,12 @@ MemCmpExpansion::MemCmpExpansion( CallInst *const CI, uint64_t Size, const TargetTransformInfo::MemCmpExpansionOptions &Options, const unsigned MaxNumLoads, const bool IsUsedForZeroCmp, - const unsigned NumLoadsPerBlock, const DataLayout &TheDataLayout) + const unsigned MaxLoadsPerBlockForZeroCmp, const DataLayout &TheDataLayout) : CI(CI), Size(Size), MaxLoadSize(0), NumLoadsNonOneByte(0), - NumLoadsPerBlock(NumLoadsPerBlock), + NumLoadsPerBlockForZeroCmp(MaxLoadsPerBlockForZeroCmp), IsUsedForZeroCmp(IsUsedForZeroCmp), DL(TheDataLayout), Builder(CI) { @@ -171,8 +171,8 @@ MemCmpExpansion::MemCmpExpansion( unsigned MemCmpExpansion::getNumBlocks() { if (IsUsedForZeroCmp) - return getNumLoads() / NumLoadsPerBlock + - (getNumLoads() % NumLoadsPerBlock != 0 ? 1 : 0); + return getNumLoads() / NumLoadsPerBlockForZeroCmp + + (getNumLoads() % NumLoadsPerBlockForZeroCmp != 0 ? 1 : 0); return getNumLoads(); } @@ -249,7 +249,7 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex, Value *Diff; const unsigned NumLoads = - std::min(getNumLoads() - LoadIndex, NumLoadsPerBlock); + std::min(getNumLoads() - LoadIndex, NumLoadsPerBlockForZeroCmp); // For a single-block expansion, start inserting before the memcmp call. if (LoadCmpBlocks.empty()) @@ -519,8 +519,6 @@ Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() { /// A memcmp expansion that only has one block of load and compare can bypass /// the compare, branch, and phi IR that is required in the general case. Value *MemCmpExpansion::getMemCmpOneBlock() { - assert(NumLoadsPerBlock == 1 && "Only handles one load pair per block"); - Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8); Value *Source1 = CI->getArgOperand(0); Value *Source2 = CI->getArgOperand(1); @@ -566,11 +564,8 @@ Value *MemCmpExpansion::getMemCmpOneBlock() { // This function expands the memcmp call into an inline expansion and returns // the memcmp result. Value *MemCmpExpansion::getMemCmpExpansion() { - // A memcmp with zero-comparison with only one block of load and compare does - // not need to set up any extra blocks. This case could be handled in the DAG, - // but since we have all of the machinery to flexibly expand any memcpy here, - // we choose to handle this case too to avoid fragmented lowering. - if ((!IsUsedForZeroCmp && NumLoadsPerBlock != 1) || getNumBlocks() != 1) { + // Create the basic block framework for a multi-block expansion. + if (getNumBlocks() != 1) { BasicBlock *StartBlock = CI->getParent(); EndBlock = StartBlock->splitBasicBlock(CI, "endblock"); setupEndBlockPHINodes(); @@ -596,8 +591,8 @@ Value *MemCmpExpansion::getMemCmpExpansion() { return getNumBlocks() == 1 ? getMemCmpEqZeroOneBlock() : getMemCmpExpansionZeroCase(); - // TODO: Handle more than one load pair per block in getMemCmpOneBlock(). - if (getNumBlocks() == 1 && NumLoadsPerBlock == 1) return getMemCmpOneBlock(); + if (getNumBlocks() == 1) + return getMemCmpOneBlock(); for (unsigned I = 0; I < getNumBlocks(); ++I) { emitLoadCompareBlock(I); @@ -709,8 +704,12 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, const unsigned MaxNumLoads = TLI->getMaxExpandSizeMemcmp(CI->getFunction()->optForSize()); + unsigned NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences() + ? MemCmpEqZeroNumLoadsPerBlock + : TLI->getMemcmpEqZeroLoadsPerBlock(); + MemCmpExpansion Expansion(CI, SizeVal, *Options, MaxNumLoads, - IsUsedForZeroCmp, MemCmpNumLoadsPerBlock, *DL); + IsUsedForZeroCmp, NumLoadsPerBlock, *DL); // Don't expand if this will require more loads than desired by the target. if (Expansion.getNumLoads() == 0) { diff --git a/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp index 6ef97d6dd5ec..bc747fc610f8 100644 --- a/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -93,11 +93,11 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) { assert(TargetRegisterInfo::isPhysicalRegister(InsReg) && "Inserted value must be in a physical register"); - DEBUG(dbgs() << "subreg: CONVERTING: " << *MI); + LLVM_DEBUG(dbgs() << "subreg: CONVERTING: " << *MI); if (MI->allDefsAreDead()) { MI->setDesc(TII->get(TargetOpcode::KILL)); - DEBUG(dbgs() << "subreg: replaced by: " << *MI); + LLVM_DEBUG(dbgs() << "subreg: replaced by: " << *MI); return true; } @@ -110,10 +110,10 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) { MI->setDesc(TII->get(TargetOpcode::KILL)); MI->RemoveOperand(3); // SubIdx MI->RemoveOperand(1); // Imm - DEBUG(dbgs() << "subreg: replace by: " << *MI); + LLVM_DEBUG(dbgs() << "subreg: replace by: " << *MI); return true; } - DEBUG(dbgs() << "subreg: eliminated!"); + LLVM_DEBUG(dbgs() << "subreg: eliminated!"); } else { TII->copyPhysReg(*MBB, MI, MI->getDebugLoc(), DstSubReg, InsReg, MI->getOperand(2).isKill()); @@ -122,10 +122,10 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) { MachineBasicBlock::iterator CopyMI = MI; --CopyMI; CopyMI->addRegisterDefined(DstReg); - DEBUG(dbgs() << "subreg: " << *CopyMI); + LLVM_DEBUG(dbgs() << "subreg: " << *CopyMI); } - DEBUG(dbgs() << '\n'); + LLVM_DEBUG(dbgs() << '\n'); MBB->erase(MI); return true; } @@ -133,9 +133,9 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) { bool ExpandPostRA::LowerCopy(MachineInstr *MI) { if (MI->allDefsAreDead()) { - DEBUG(dbgs() << "dead copy: " << *MI); + LLVM_DEBUG(dbgs() << "dead copy: " << *MI); MI->setDesc(TII->get(TargetOpcode::KILL)); - DEBUG(dbgs() << "replaced by: " << *MI); + LLVM_DEBUG(dbgs() << "replaced by: " << *MI); return true; } @@ -144,14 +144,15 @@ bool ExpandPostRA::LowerCopy(MachineInstr *MI) { bool IdentityCopy = (SrcMO.getReg() == DstMO.getReg()); if (IdentityCopy || SrcMO.isUndef()) { - DEBUG(dbgs() << (IdentityCopy ? "identity copy: " : "undef copy: ") << *MI); + LLVM_DEBUG(dbgs() << (IdentityCopy ? "identity copy: " : "undef copy: ") + << *MI); // No need to insert an identity copy instruction, but replace with a KILL // if liveness is changed. if (SrcMO.isUndef() || MI->getNumOperands() > 2) { // We must make sure the super-register gets killed. Replace the // instruction with KILL. MI->setDesc(TII->get(TargetOpcode::KILL)); - DEBUG(dbgs() << "replaced by: " << *MI); + LLVM_DEBUG(dbgs() << "replaced by: " << *MI); return true; } // Vanilla identity copy. @@ -159,13 +160,13 @@ bool ExpandPostRA::LowerCopy(MachineInstr *MI) { return true; } - DEBUG(dbgs() << "real copy: " << *MI); + LLVM_DEBUG(dbgs() << "real copy: " << *MI); TII->copyPhysReg(*MI->getParent(), MI, MI->getDebugLoc(), DstMO.getReg(), SrcMO.getReg(), SrcMO.isKill()); if (MI->getNumOperands() > 2) TransferImplicitOperands(MI); - DEBUG({ + LLVM_DEBUG({ MachineBasicBlock::iterator dMI = MI; dbgs() << "replaced by: " << *(--dMI); }); @@ -177,9 +178,9 @@ bool ExpandPostRA::LowerCopy(MachineInstr *MI) { /// copies. /// bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) { - DEBUG(dbgs() << "Machine Function\n" - << "********** EXPANDING POST-RA PSEUDO INSTRS **********\n" - << "********** Function: " << MF.getName() << '\n'); + LLVM_DEBUG(dbgs() << "Machine Function\n" + << "********** EXPANDING POST-RA PSEUDO INSTRS **********\n" + << "********** Function: " << MF.getName() << '\n'); TRI = MF.getSubtarget().getRegisterInfo(); TII = MF.getSubtarget().getInstrInfo(); diff --git a/contrib/llvm/lib/CodeGen/ExpandReductions.cpp b/contrib/llvm/lib/CodeGen/ExpandReductions.cpp index abf487a4f198..7552ba8cd85d 100644 --- a/contrib/llvm/lib/CodeGen/ExpandReductions.cpp +++ b/contrib/llvm/lib/CodeGen/ExpandReductions.cpp @@ -78,13 +78,15 @@ RecurrenceDescriptor::MinMaxRecurrenceKind getMRK(Intrinsic::ID ID) { bool expandReductions(Function &F, const TargetTransformInfo *TTI) { bool Changed = false; - SmallVector<IntrinsicInst*, 4> Worklist; + SmallVector<IntrinsicInst *, 4> Worklist; for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) if (auto II = dyn_cast<IntrinsicInst>(&*I)) Worklist.push_back(II); for (auto *II : Worklist) { IRBuilder<> Builder(II); + bool IsOrdered = false; + Value *Acc = nullptr; Value *Vec = nullptr; auto ID = II->getIntrinsicID(); auto MRK = RecurrenceDescriptor::MRK_Invalid; @@ -92,11 +94,10 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) { case Intrinsic::experimental_vector_reduce_fadd: case Intrinsic::experimental_vector_reduce_fmul: // FMFs must be attached to the call, otherwise it's an ordered reduction - // and it can't be handled by generating this shuffle sequence. - // TODO: Implement scalarization of ordered reductions here for targets - // without native support. + // and it can't be handled by generating a shuffle sequence. if (!II->getFastMathFlags().isFast()) - continue; + IsOrdered = true; + Acc = II->getArgOperand(0); Vec = II->getArgOperand(1); break; case Intrinsic::experimental_vector_reduce_add: @@ -118,7 +119,9 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) { } if (!TTI->shouldExpandReduction(II)) continue; - auto Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK); + Value *Rdx = + IsOrdered ? getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK) + : getShuffleReduction(Builder, Vec, getOpcode(ID), MRK); II->replaceAllUsesWith(Rdx); II->eraseFromParent(); Changed = true; diff --git a/contrib/llvm/lib/CodeGen/FaultMaps.cpp b/contrib/llvm/lib/CodeGen/FaultMaps.cpp index 2924b011e0c1..361558a0e562 100644 --- a/contrib/llvm/lib/CodeGen/FaultMaps.cpp +++ b/contrib/llvm/lib/CodeGen/FaultMaps.cpp @@ -62,17 +62,17 @@ void FaultMaps::serializeToFaultMapSection() { // Emit a dummy symbol to force section inclusion. OS.EmitLabel(OutContext.getOrCreateSymbol(Twine("__LLVM_FaultMaps"))); - DEBUG(dbgs() << "********** Fault Map Output **********\n"); + LLVM_DEBUG(dbgs() << "********** Fault Map Output **********\n"); // Header OS.EmitIntValue(FaultMapVersion, 1); // Version. OS.EmitIntValue(0, 1); // Reserved. OS.EmitIntValue(0, 2); // Reserved. - DEBUG(dbgs() << WFMP << "#functions = " << FunctionInfos.size() << "\n"); + LLVM_DEBUG(dbgs() << WFMP << "#functions = " << FunctionInfos.size() << "\n"); OS.EmitIntValue(FunctionInfos.size(), 4); - DEBUG(dbgs() << WFMP << "functions:\n"); + LLVM_DEBUG(dbgs() << WFMP << "functions:\n"); for (const auto &FFI : FunctionInfos) emitFunctionInfo(FFI.first, FFI.second); @@ -82,25 +82,25 @@ void FaultMaps::emitFunctionInfo(const MCSymbol *FnLabel, const FunctionFaultInfos &FFI) { MCStreamer &OS = *AP.OutStreamer; - DEBUG(dbgs() << WFMP << " function addr: " << *FnLabel << "\n"); + LLVM_DEBUG(dbgs() << WFMP << " function addr: " << *FnLabel << "\n"); OS.EmitSymbolValue(FnLabel, 8); - DEBUG(dbgs() << WFMP << " #faulting PCs: " << FFI.size() << "\n"); + LLVM_DEBUG(dbgs() << WFMP << " #faulting PCs: " << FFI.size() << "\n"); OS.EmitIntValue(FFI.size(), 4); OS.EmitIntValue(0, 4); // Reserved for (auto &Fault : FFI) { - DEBUG(dbgs() << WFMP << " fault type: " - << faultTypeToString(Fault.Kind) << "\n"); + LLVM_DEBUG(dbgs() << WFMP << " fault type: " + << faultTypeToString(Fault.Kind) << "\n"); OS.EmitIntValue(Fault.Kind, 4); - DEBUG(dbgs() << WFMP << " faulting PC offset: " - << *Fault.FaultingOffsetExpr << "\n"); + LLVM_DEBUG(dbgs() << WFMP << " faulting PC offset: " + << *Fault.FaultingOffsetExpr << "\n"); OS.EmitValue(Fault.FaultingOffsetExpr, 4); - DEBUG(dbgs() << WFMP << " fault handler PC offset: " - << *Fault.HandlerOffsetExpr << "\n"); + LLVM_DEBUG(dbgs() << WFMP << " fault handler PC offset: " + << *Fault.HandlerOffsetExpr << "\n"); OS.EmitValue(Fault.HandlerOffsetExpr, 4); } } diff --git a/contrib/llvm/lib/CodeGen/FuncletLayout.cpp b/contrib/llvm/lib/CodeGen/FuncletLayout.cpp index 9c71b18619a1..581cd423f2d4 100644 --- a/contrib/llvm/lib/CodeGen/FuncletLayout.cpp +++ b/contrib/llvm/lib/CodeGen/FuncletLayout.cpp @@ -41,8 +41,11 @@ INITIALIZE_PASS(FuncletLayout, DEBUG_TYPE, "Contiguously Lay Out Funclets", false, false) bool FuncletLayout::runOnMachineFunction(MachineFunction &F) { + // Even though this gets information from getEHScopeMembership(), this pass is + // only necessary for funclet-based EH personalities, in which these EH scopes + // are outlined at the end. DenseMap<const MachineBasicBlock *, int> FuncletMembership = - getFuncletMembership(F); + getEHScopeMembership(F); if (FuncletMembership.empty()) return false; diff --git a/contrib/llvm/lib/CodeGen/GCRootLowering.cpp b/contrib/llvm/lib/CodeGen/GCRootLowering.cpp index 4361d8b248c8..31ddeadbd97a 100644 --- a/contrib/llvm/lib/CodeGen/GCRootLowering.cpp +++ b/contrib/llvm/lib/CodeGen/GCRootLowering.cpp @@ -38,7 +38,7 @@ namespace { /// directed by the GCStrategy. It also performs automatic root initialization /// and custom intrinsic lowering. class LowerIntrinsics : public FunctionPass { - bool PerformDefaultLowering(Function &F, GCStrategy &Coll); + bool PerformDefaultLowering(Function &F, GCStrategy &S); public: static char ID; @@ -61,7 +61,7 @@ class GCMachineCodeAnalysis : public MachineFunctionPass { const TargetInstrInfo *TII; void FindSafePoints(MachineFunction &MF); - void VisitCallPoint(MachineBasicBlock::iterator MI); + void VisitCallPoint(MachineBasicBlock::iterator CI); MCSymbol *InsertLabel(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL) const; diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index 114c068749eb..07de31bec660 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -38,6 +38,9 @@ bool CallLowering::lowerCall( ArgInfo OrigArg{ArgRegs[i], Arg->getType(), ISD::ArgFlagsTy{}, i < NumFixedArgs}; setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CS); + // We don't currently support swifterror or swiftself args. + if (OrigArg.Flags.isSwiftError() || OrigArg.Flags.isSwiftSelf()) + return false; OrigArgs.push_back(OrigArg); ++i; } diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/Combiner.cpp new file mode 100644 index 000000000000..0bc5b87de150 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalISel/Combiner.cpp @@ -0,0 +1,81 @@ +//===-- lib/CodeGen/GlobalISel/GICombiner.cpp -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file constains common code to combine machine functions at generic +// level. +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/Combiner.h" +#include "llvm/CodeGen/GlobalISel/CombinerInfo.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" +#include "llvm/CodeGen/GlobalISel/GISelWorkList.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "gi-combiner" + +using namespace llvm; + +Combiner::Combiner(CombinerInfo &Info, const TargetPassConfig *TPC) + : CInfo(Info), TPC(TPC) { + (void)this->TPC; // FIXME: Remove when used. +} + +bool Combiner::combineMachineInstrs(MachineFunction &MF) { + // If the ISel pipeline failed, do not bother running this pass. + // FIXME: Should this be here or in individual combiner passes. + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedISel)) + return false; + + MRI = &MF.getRegInfo(); + Builder.setMF(MF); + + LLVM_DEBUG(dbgs() << "Generic MI Combiner for: " << MF.getName() << '\n'); + + MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr); + + bool MFChanged = false; + bool Changed; + + do { + // Collect all instructions. Do a post order traversal for basic blocks and + // insert with list bottom up, so while we pop_back_val, we'll traverse top + // down RPOT. + Changed = false; + GISelWorkList<512> WorkList; + for (MachineBasicBlock *MBB : post_order(&MF)) { + if (MBB->empty()) + continue; + for (auto MII = MBB->rbegin(), MIE = MBB->rend(); MII != MIE;) { + MachineInstr *CurMI = &*MII; + ++MII; + // Erase dead insts before even adding to the list. + if (isTriviallyDead(*CurMI, *MRI)) { + LLVM_DEBUG(dbgs() << *CurMI << "Is dead; erasing.\n"); + CurMI->eraseFromParentAndMarkDBGValuesForRemoval(); + continue; + } + WorkList.insert(CurMI); + } + } + // Main Loop. Process the instructions here. + while (!WorkList.empty()) { + MachineInstr *CurrInst = WorkList.pop_back_val(); + LLVM_DEBUG(dbgs() << "Try combining " << *CurrInst << "\n";); + Changed |= CInfo.combine(*CurrInst, Builder); + } + MFChanged |= Changed; + } while (Changed); + + return MFChanged; +} diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp new file mode 100644 index 000000000000..44e904a6391b --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -0,0 +1,41 @@ +//== ---lib/CodeGen/GlobalISel/GICombinerHelper.cpp --------------------- == // +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include "llvm/CodeGen/GlobalISel/CombinerHelper.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +#define DEBUG_TYPE "gi-combine" + +using namespace llvm; + +CombinerHelper::CombinerHelper(MachineIRBuilder &B) : + Builder(B), MRI(Builder.getMF().getRegInfo()) {} + +bool CombinerHelper::tryCombineCopy(MachineInstr &MI) { + if (MI.getOpcode() != TargetOpcode::COPY) + return false; + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned SrcReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT SrcTy = MRI.getType(SrcReg); + // Simple Copy Propagation. + // a(sx) = COPY b(sx) -> Replace all uses of a with b. + if (DstTy.isValid() && SrcTy.isValid() && DstTy == SrcTy) { + MI.eraseFromParent(); + MRI.replaceRegWith(DstReg, SrcReg); + return true; + } + return false; +} + +bool CombinerHelper::tryCombine(MachineInstr &MI) { + return tryCombineCopy(MI); +} diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index a329a71e2c95..bafb7a05536d 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -26,6 +26,7 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/StackProtector.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -102,37 +103,103 @@ IRTranslator::IRTranslator() : MachineFunctionPass(ID) { } void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<StackProtector>(); AU.addRequired<TargetPassConfig>(); + getSelectionDAGFallbackAnalysisUsage(AU); MachineFunctionPass::getAnalysisUsage(AU); } -unsigned IRTranslator::getOrCreateVReg(const Value &Val) { - unsigned &ValReg = ValToVReg[&Val]; +static void computeValueLLTs(const DataLayout &DL, Type &Ty, + SmallVectorImpl<LLT> &ValueTys, + SmallVectorImpl<uint64_t> *Offsets = nullptr, + uint64_t StartingOffset = 0) { + // Given a struct type, recursively traverse the elements. + if (StructType *STy = dyn_cast<StructType>(&Ty)) { + const StructLayout *SL = DL.getStructLayout(STy); + for (unsigned I = 0, E = STy->getNumElements(); I != E; ++I) + computeValueLLTs(DL, *STy->getElementType(I), ValueTys, Offsets, + StartingOffset + SL->getElementOffset(I)); + return; + } + // Given an array type, recursively traverse the elements. + if (ArrayType *ATy = dyn_cast<ArrayType>(&Ty)) { + Type *EltTy = ATy->getElementType(); + uint64_t EltSize = DL.getTypeAllocSize(EltTy); + for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) + computeValueLLTs(DL, *EltTy, ValueTys, Offsets, + StartingOffset + i * EltSize); + return; + } + // Interpret void as zero return values. + if (Ty.isVoidTy()) + return; + // Base case: we can get an LLT for this LLVM IR type. + ValueTys.push_back(getLLTForType(Ty, DL)); + if (Offsets != nullptr) + Offsets->push_back(StartingOffset * 8); +} + +IRTranslator::ValueToVRegInfo::VRegListT & +IRTranslator::allocateVRegs(const Value &Val) { + assert(!VMap.contains(Val) && "Value already allocated in VMap"); + auto *Regs = VMap.getVRegs(Val); + auto *Offsets = VMap.getOffsets(Val); + SmallVector<LLT, 4> SplitTys; + computeValueLLTs(*DL, *Val.getType(), SplitTys, + Offsets->empty() ? Offsets : nullptr); + for (unsigned i = 0; i < SplitTys.size(); ++i) + Regs->push_back(0); + return *Regs; +} + +ArrayRef<unsigned> IRTranslator::getOrCreateVRegs(const Value &Val) { + auto VRegsIt = VMap.findVRegs(Val); + if (VRegsIt != VMap.vregs_end()) + return *VRegsIt->second; - if (ValReg) - return ValReg; + if (Val.getType()->isVoidTy()) + return *VMap.getVRegs(Val); + + // Create entry for this type. + auto *VRegs = VMap.getVRegs(Val); + auto *Offsets = VMap.getOffsets(Val); - // Fill ValRegsSequence with the sequence of registers - // we need to concat together to produce the value. assert(Val.getType()->isSized() && "Don't know how to create an empty vreg"); - unsigned VReg = - MRI->createGenericVirtualRegister(getLLTForType(*Val.getType(), *DL)); - ValReg = VReg; - if (auto CV = dyn_cast<Constant>(&Val)) { - bool Success = translate(*CV, VReg); + SmallVector<LLT, 4> SplitTys; + computeValueLLTs(*DL, *Val.getType(), SplitTys, + Offsets->empty() ? Offsets : nullptr); + + if (!isa<Constant>(Val)) { + for (auto Ty : SplitTys) + VRegs->push_back(MRI->createGenericVirtualRegister(Ty)); + return *VRegs; + } + + if (Val.getType()->isAggregateType()) { + // UndefValue, ConstantAggregateZero + auto &C = cast<Constant>(Val); + unsigned Idx = 0; + while (auto Elt = C.getAggregateElement(Idx++)) { + auto EltRegs = getOrCreateVRegs(*Elt); + std::copy(EltRegs.begin(), EltRegs.end(), std::back_inserter(*VRegs)); + } + } else { + assert(SplitTys.size() == 1 && "unexpectedly split LLT"); + VRegs->push_back(MRI->createGenericVirtualRegister(SplitTys[0])); + bool Success = translate(cast<Constant>(Val), VRegs->front()); if (!Success) { OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", MF->getFunction().getSubprogram(), &MF->getFunction().getEntryBlock()); R << "unable to translate constant: " << ore::NV("Type", Val.getType()); reportTranslationError(*MF, *TPC, *ORE, R); - return VReg; + return *VRegs; } } - return VReg; + return *VRegs; } int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) { @@ -164,6 +231,20 @@ unsigned IRTranslator::getMemOpAlignment(const Instruction &I) { } else if (const LoadInst *LI = dyn_cast<LoadInst>(&I)) { Alignment = LI->getAlignment(); ValTy = LI->getType(); + } else if (const AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(&I)) { + // TODO(PR27168): This instruction has no alignment attribute, but unlike + // the default alignment for load/store, the default here is to assume + // it has NATURAL alignment, not DataLayout-specified alignment. + const DataLayout &DL = AI->getModule()->getDataLayout(); + Alignment = DL.getTypeStoreSize(AI->getCompareOperand()->getType()); + ValTy = AI->getCompareOperand()->getType(); + } else if (const AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(&I)) { + // TODO(PR27168): This instruction has no alignment attribute, but unlike + // the default alignment for load/store, the default here is to assume + // it has NATURAL alignment, not DataLayout-specified alignment. + const DataLayout &DL = AI->getModule()->getDataLayout(); + Alignment = DL.getTypeStoreSize(AI->getValOperand()->getType()); + ValTy = AI->getType(); } else { OptimizationRemarkMissed R("gisel-irtranslator", "", &I); R << "unable to translate memop: " << ore::NV("Opcode", &I); @@ -243,7 +324,11 @@ bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) { // The target may mess up with the insertion point, but // this is not important as a return is the last instruction // of the block anyway. - return CLI->lowerReturn(MIRBuilder, Ret, !Ret ? 0 : getOrCreateVReg(*Ret)); + + // FIXME: this interface should simplify when CallLowering gets adapted to + // multiple VRegs per Value. + unsigned VReg = Ret ? packRegs(*Ret, MIRBuilder) : 0; + return CLI->lowerReturn(MIRBuilder, Ret, VReg); } bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) { @@ -342,15 +427,23 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { if (DL->getTypeStoreSize(LI.getType()) == 0) return true; - unsigned Res = getOrCreateVReg(LI); - unsigned Addr = getOrCreateVReg(*LI.getPointerOperand()); + ArrayRef<unsigned> Regs = getOrCreateVRegs(LI); + ArrayRef<uint64_t> Offsets = *VMap.getOffsets(LI); + unsigned Base = getOrCreateVReg(*LI.getPointerOperand()); + + for (unsigned i = 0; i < Regs.size(); ++i) { + unsigned Addr = 0; + MIRBuilder.materializeGEP(Addr, Base, LLT::scalar(64), Offsets[i] / 8); + + MachinePointerInfo Ptr(LI.getPointerOperand(), Offsets[i] / 8); + unsigned BaseAlign = getMemOpAlignment(LI); + auto MMO = MF->getMachineMemOperand( + Ptr, Flags, (MRI->getType(Regs[i]).getSizeInBits() + 7) / 8, + MinAlign(BaseAlign, Offsets[i] / 8), AAMDNodes(), nullptr, + LI.getSyncScopeID(), LI.getOrdering()); + MIRBuilder.buildLoad(Regs[i], Addr, *MMO); + } - MIRBuilder.buildLoad( - Res, Addr, - *MF->getMachineMemOperand(MachinePointerInfo(LI.getPointerOperand()), - Flags, DL->getTypeStoreSize(LI.getType()), - getMemOpAlignment(LI), AAMDNodes(), nullptr, - LI.getSyncScopeID(), LI.getOrdering())); return true; } @@ -363,50 +456,61 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) { if (DL->getTypeStoreSize(SI.getValueOperand()->getType()) == 0) return true; - unsigned Val = getOrCreateVReg(*SI.getValueOperand()); - unsigned Addr = getOrCreateVReg(*SI.getPointerOperand()); - - MIRBuilder.buildStore( - Val, Addr, - *MF->getMachineMemOperand( - MachinePointerInfo(SI.getPointerOperand()), Flags, - DL->getTypeStoreSize(SI.getValueOperand()->getType()), - getMemOpAlignment(SI), AAMDNodes(), nullptr, SI.getSyncScopeID(), - SI.getOrdering())); + ArrayRef<unsigned> Vals = getOrCreateVRegs(*SI.getValueOperand()); + ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*SI.getValueOperand()); + unsigned Base = getOrCreateVReg(*SI.getPointerOperand()); + + for (unsigned i = 0; i < Vals.size(); ++i) { + unsigned Addr = 0; + MIRBuilder.materializeGEP(Addr, Base, LLT::scalar(64), Offsets[i] / 8); + + MachinePointerInfo Ptr(SI.getPointerOperand(), Offsets[i] / 8); + unsigned BaseAlign = getMemOpAlignment(SI); + auto MMO = MF->getMachineMemOperand( + Ptr, Flags, (MRI->getType(Vals[i]).getSizeInBits() + 7) / 8, + MinAlign(BaseAlign, Offsets[i] / 8), AAMDNodes(), nullptr, + SI.getSyncScopeID(), SI.getOrdering()); + MIRBuilder.buildStore(Vals[i], Addr, *MMO); + } return true; } -bool IRTranslator::translateExtractValue(const User &U, - MachineIRBuilder &MIRBuilder) { +static uint64_t getOffsetFromIndices(const User &U, const DataLayout &DL) { const Value *Src = U.getOperand(0); Type *Int32Ty = Type::getInt32Ty(U.getContext()); - SmallVector<Value *, 1> Indices; - - // If Src is a single element ConstantStruct, translate extractvalue - // to that element to avoid inserting a cast instruction. - if (auto CS = dyn_cast<ConstantStruct>(Src)) - if (CS->getNumOperands() == 1) { - unsigned Res = getOrCreateVReg(*CS->getOperand(0)); - ValToVReg[&U] = Res; - return true; - } // getIndexedOffsetInType is designed for GEPs, so the first index is the // usual array element rather than looking into the actual aggregate. + SmallVector<Value *, 1> Indices; Indices.push_back(ConstantInt::get(Int32Ty, 0)); if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(&U)) { for (auto Idx : EVI->indices()) Indices.push_back(ConstantInt::get(Int32Ty, Idx)); + } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(&U)) { + for (auto Idx : IVI->indices()) + Indices.push_back(ConstantInt::get(Int32Ty, Idx)); } else { for (unsigned i = 1; i < U.getNumOperands(); ++i) Indices.push_back(U.getOperand(i)); } - uint64_t Offset = 8 * DL->getIndexedOffsetInType(Src->getType(), Indices); + return 8 * static_cast<uint64_t>( + DL.getIndexedOffsetInType(Src->getType(), Indices)); +} - unsigned Res = getOrCreateVReg(U); - MIRBuilder.buildExtract(Res, getOrCreateVReg(*Src), Offset); +bool IRTranslator::translateExtractValue(const User &U, + MachineIRBuilder &MIRBuilder) { + const Value *Src = U.getOperand(0); + uint64_t Offset = getOffsetFromIndices(U, *DL); + ArrayRef<unsigned> SrcRegs = getOrCreateVRegs(*Src); + ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*Src); + unsigned Idx = std::lower_bound(Offsets.begin(), Offsets.end(), Offset) - + Offsets.begin(); + auto &DstRegs = allocateVRegs(U); + + for (unsigned i = 0; i < DstRegs.size(); ++i) + DstRegs[i] = SrcRegs[Idx++]; return true; } @@ -414,37 +518,33 @@ bool IRTranslator::translateExtractValue(const User &U, bool IRTranslator::translateInsertValue(const User &U, MachineIRBuilder &MIRBuilder) { const Value *Src = U.getOperand(0); - Type *Int32Ty = Type::getInt32Ty(U.getContext()); - SmallVector<Value *, 1> Indices; - - // getIndexedOffsetInType is designed for GEPs, so the first index is the - // usual array element rather than looking into the actual aggregate. - Indices.push_back(ConstantInt::get(Int32Ty, 0)); - - if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(&U)) { - for (auto Idx : IVI->indices()) - Indices.push_back(ConstantInt::get(Int32Ty, Idx)); - } else { - for (unsigned i = 2; i < U.getNumOperands(); ++i) - Indices.push_back(U.getOperand(i)); + uint64_t Offset = getOffsetFromIndices(U, *DL); + auto &DstRegs = allocateVRegs(U); + ArrayRef<uint64_t> DstOffsets = *VMap.getOffsets(U); + ArrayRef<unsigned> SrcRegs = getOrCreateVRegs(*Src); + ArrayRef<unsigned> InsertedRegs = getOrCreateVRegs(*U.getOperand(1)); + auto InsertedIt = InsertedRegs.begin(); + + for (unsigned i = 0; i < DstRegs.size(); ++i) { + if (DstOffsets[i] >= Offset && InsertedIt != InsertedRegs.end()) + DstRegs[i] = *InsertedIt++; + else + DstRegs[i] = SrcRegs[i]; } - uint64_t Offset = 8 * DL->getIndexedOffsetInType(Src->getType(), Indices); - - unsigned Res = getOrCreateVReg(U); - unsigned Inserted = getOrCreateVReg(*U.getOperand(1)); - MIRBuilder.buildInsert(Res, getOrCreateVReg(*Src), Inserted, Offset); - return true; } bool IRTranslator::translateSelect(const User &U, MachineIRBuilder &MIRBuilder) { - unsigned Res = getOrCreateVReg(U); unsigned Tst = getOrCreateVReg(*U.getOperand(0)); - unsigned Op0 = getOrCreateVReg(*U.getOperand(1)); - unsigned Op1 = getOrCreateVReg(*U.getOperand(2)); - MIRBuilder.buildSelect(Res, Tst, Op0, Op1); + ArrayRef<unsigned> ResRegs = getOrCreateVRegs(U); + ArrayRef<unsigned> Op0Regs = getOrCreateVRegs(*U.getOperand(1)); + ArrayRef<unsigned> Op1Regs = getOrCreateVRegs(*U.getOperand(2)); + + for (unsigned i = 0; i < ResRegs.size(); ++i) + MIRBuilder.buildSelect(ResRegs[i], Tst, Op0Regs[i], Op1Regs[i]); + return true; } @@ -453,15 +553,16 @@ bool IRTranslator::translateBitCast(const User &U, // If we're bitcasting to the source type, we can reuse the source vreg. if (getLLTForType(*U.getOperand(0)->getType(), *DL) == getLLTForType(*U.getType(), *DL)) { - // Get the source vreg now, to avoid invalidating ValToVReg. unsigned SrcReg = getOrCreateVReg(*U.getOperand(0)); - unsigned &Reg = ValToVReg[&U]; + auto &Regs = *VMap.getVRegs(U); // If we already assigned a vreg for this bitcast, we can't change that. // Emit a copy to satisfy the users we already emitted. - if (Reg) - MIRBuilder.buildCopy(Reg, SrcReg); - else - Reg = SrcReg; + if (!Regs.empty()) + MIRBuilder.buildCopy(Regs[0], SrcReg); + else { + Regs.push_back(SrcReg); + VMap.getOffsets(U)->push_back(0); + } return true; } return translateCast(TargetOpcode::G_BITCAST, U, MIRBuilder); @@ -516,10 +617,6 @@ bool IRTranslator::translateGetElementPtr(const User &U, Offset = 0; } - // N = N + Idx * ElementSize; - unsigned ElementSizeReg = - getOrCreateVReg(*ConstantInt::get(OffsetIRTy, ElementSize)); - unsigned IdxReg = getOrCreateVReg(*Idx); if (MRI->getType(IdxReg) != OffsetTy) { unsigned NewIdxReg = MRI->createGenericVirtualRegister(OffsetTy); @@ -527,11 +624,20 @@ bool IRTranslator::translateGetElementPtr(const User &U, IdxReg = NewIdxReg; } - unsigned OffsetReg = MRI->createGenericVirtualRegister(OffsetTy); - MIRBuilder.buildMul(OffsetReg, ElementSizeReg, IdxReg); + // N = N + Idx * ElementSize; + // Avoid doing it for ElementSize of 1. + unsigned GepOffsetReg; + if (ElementSize != 1) { + unsigned ElementSizeReg = + getOrCreateVReg(*ConstantInt::get(OffsetIRTy, ElementSize)); + + GepOffsetReg = MRI->createGenericVirtualRegister(OffsetTy); + MIRBuilder.buildMul(GepOffsetReg, ElementSizeReg, IdxReg); + } else + GepOffsetReg = IdxReg; unsigned NewBaseReg = MRI->createGenericVirtualRegister(PtrTy); - MIRBuilder.buildGEP(NewBaseReg, BaseReg, OffsetReg); + MIRBuilder.buildGEP(NewBaseReg, BaseReg, GepOffsetReg); BaseReg = NewBaseReg; } } @@ -607,14 +713,10 @@ void IRTranslator::getStackGuard(unsigned DstReg, bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op, MachineIRBuilder &MIRBuilder) { - LLT Ty = getLLTForType(*CI.getOperand(0)->getType(), *DL); - LLT s1 = LLT::scalar(1); - unsigned Width = Ty.getSizeInBits(); - unsigned Res = MRI->createGenericVirtualRegister(Ty); - unsigned Overflow = MRI->createGenericVirtualRegister(s1); + ArrayRef<unsigned> ResRegs = getOrCreateVRegs(CI); auto MIB = MIRBuilder.buildInstr(Op) - .addDef(Res) - .addDef(Overflow) + .addDef(ResRegs[0]) + .addDef(ResRegs[1]) .addUse(getOrCreateVReg(*CI.getOperand(0))) .addUse(getOrCreateVReg(*CI.getOperand(1))); @@ -624,7 +726,6 @@ bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op, MIB.addUse(Zero); } - MIRBuilder.buildSequence(getOrCreateVReg(CI), {Res, Overflow}, {0, Width}); return true; } @@ -647,7 +748,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, const Value *Address = DI.getAddress(); if (!Address || isa<UndefValue>(Address)) { - DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); + LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); return true; } @@ -741,6 +842,11 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, .addDef(getOrCreateVReg(CI)) .addUse(getOrCreateVReg(*CI.getArgOperand(0))); return true; + case Intrinsic::fabs: + MIRBuilder.buildInstr(TargetOpcode::G_FABS) + .addDef(getOrCreateVReg(CI)) + .addUse(getOrCreateVReg(*CI.getArgOperand(0))); + return true; case Intrinsic::fma: MIRBuilder.buildInstr(TargetOpcode::G_FMA) .addDef(getOrCreateVReg(CI)) @@ -748,6 +854,25 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, .addUse(getOrCreateVReg(*CI.getArgOperand(1))) .addUse(getOrCreateVReg(*CI.getArgOperand(2))); return true; + case Intrinsic::fmuladd: { + const TargetMachine &TM = MF->getTarget(); + const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering(); + unsigned Dst = getOrCreateVReg(CI); + unsigned Op0 = getOrCreateVReg(*CI.getArgOperand(0)); + unsigned Op1 = getOrCreateVReg(*CI.getArgOperand(1)); + unsigned Op2 = getOrCreateVReg(*CI.getArgOperand(2)); + if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && + TLI.isFMAFasterThanFMulAndFAdd(TLI.getValueType(*DL, CI.getType()))) { + // TODO: Revisit this to see if we should move this part of the + // lowering to the combiner. + MIRBuilder.buildInstr(TargetOpcode::G_FMA, Dst, Op0, Op1, Op2); + } else { + LLT Ty = getLLTForType(*CI.getType(), *DL); + auto FMul = MIRBuilder.buildInstr(TargetOpcode::G_FMUL, Ty, Op0, Op1); + MIRBuilder.buildInstr(TargetOpcode::G_FADD, Dst, FMul, Op2); + } + return true; + } case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset: @@ -807,6 +932,34 @@ bool IRTranslator::translateInlineAsm(const CallInst &CI, return true; } +unsigned IRTranslator::packRegs(const Value &V, + MachineIRBuilder &MIRBuilder) { + ArrayRef<unsigned> Regs = getOrCreateVRegs(V); + ArrayRef<uint64_t> Offsets = *VMap.getOffsets(V); + LLT BigTy = getLLTForType(*V.getType(), *DL); + + if (Regs.size() == 1) + return Regs[0]; + + unsigned Dst = MRI->createGenericVirtualRegister(BigTy); + MIRBuilder.buildUndef(Dst); + for (unsigned i = 0; i < Regs.size(); ++i) { + unsigned NewDst = MRI->createGenericVirtualRegister(BigTy); + MIRBuilder.buildInsert(NewDst, Dst, Regs[i], Offsets[i]); + Dst = NewDst; + } + return Dst; +} + +void IRTranslator::unpackRegs(const Value &V, unsigned Src, + MachineIRBuilder &MIRBuilder) { + ArrayRef<unsigned> Regs = getOrCreateVRegs(V); + ArrayRef<uint64_t> Offsets = *VMap.getOffsets(V); + + for (unsigned i = 0; i < Regs.size(); ++i) + MIRBuilder.buildExtract(Regs[i], Src, Offsets[i]); +} + bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { const CallInst &CI = cast<CallInst>(U); auto TII = MF->getTarget().getIntrinsicInfo(); @@ -826,16 +979,24 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { ID = static_cast<Intrinsic::ID>(TII->getIntrinsicID(F)); } + bool IsSplitType = valueIsSplit(CI); if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic) { - unsigned Res = CI.getType()->isVoidTy() ? 0 : getOrCreateVReg(CI); + unsigned Res = IsSplitType ? MRI->createGenericVirtualRegister( + getLLTForType(*CI.getType(), *DL)) + : getOrCreateVReg(CI); + SmallVector<unsigned, 8> Args; for (auto &Arg: CI.arg_operands()) - Args.push_back(getOrCreateVReg(*Arg)); + Args.push_back(packRegs(*Arg, MIRBuilder)); MF->getFrameInfo().setHasCalls(true); - return CLI->lowerCall(MIRBuilder, &CI, Res, Args, [&]() { + bool Success = CLI->lowerCall(MIRBuilder, &CI, Res, Args, [&]() { return getOrCreateVReg(*CI.getCalledValue()); }); + + if (IsSplitType) + unpackRegs(CI, Res, MIRBuilder); + return Success; } assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic"); @@ -843,7 +1004,14 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { if (translateKnownIntrinsic(CI, ID, MIRBuilder)) return true; - unsigned Res = CI.getType()->isVoidTy() ? 0 : getOrCreateVReg(CI); + unsigned Res = 0; + if (!CI.getType()->isVoidTy()) { + if (IsSplitType) + Res = + MRI->createGenericVirtualRegister(getLLTForType(*CI.getType(), *DL)); + else + Res = getOrCreateVReg(CI); + } MachineInstrBuilder MIB = MIRBuilder.buildIntrinsic(ID, Res, !CI.doesNotAccessMemory()); @@ -851,9 +1019,12 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { // Some intrinsics take metadata parameters. Reject them. if (isa<MetadataAsValue>(Arg)) return false; - MIB.addUse(getOrCreateVReg(*Arg)); + MIB.addUse(packRegs(*Arg, MIRBuilder)); } + if (IsSplitType) + unpackRegs(CI, Res, MIRBuilder); + // Add a MachineMemOperand if it is a target mem intrinsic. const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering(); TargetLowering::IntrinsicInfo Info; @@ -897,15 +1068,18 @@ bool IRTranslator::translateInvoke(const User &U, MCSymbol *BeginSymbol = Context.createTempSymbol(); MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol); - unsigned Res = I.getType()->isVoidTy() ? 0 : getOrCreateVReg(I); + unsigned Res = + MRI->createGenericVirtualRegister(getLLTForType(*I.getType(), *DL)); SmallVector<unsigned, 8> Args; for (auto &Arg: I.arg_operands()) - Args.push_back(getOrCreateVReg(*Arg)); + Args.push_back(packRegs(*Arg, MIRBuilder)); if (!CLI->lowerCall(MIRBuilder, &I, Res, Args, [&]() { return getOrCreateVReg(*I.getCalledValue()); })) return false; + unpackRegs(I, Res, MIRBuilder); + MCSymbol *EndSymbol = Context.createTempSymbol(); MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol); @@ -964,27 +1138,18 @@ bool IRTranslator::translateLandingPad(const User &U, return false; MBB.addLiveIn(ExceptionReg); - unsigned VReg = MRI->createGenericVirtualRegister(Tys[0]), - Tmp = MRI->createGenericVirtualRegister(Ty); - MIRBuilder.buildCopy(VReg, ExceptionReg); - MIRBuilder.buildInsert(Tmp, Undef, VReg, 0); + ArrayRef<unsigned> ResRegs = getOrCreateVRegs(LP); + MIRBuilder.buildCopy(ResRegs[0], ExceptionReg); unsigned SelectorReg = TLI.getExceptionSelectorRegister(PersonalityFn); if (!SelectorReg) return false; MBB.addLiveIn(SelectorReg); - - // N.b. the exception selector register always has pointer type and may not - // match the actual IR-level type in the landingpad so an extra cast is - // needed. unsigned PtrVReg = MRI->createGenericVirtualRegister(Tys[0]); MIRBuilder.buildCopy(PtrVReg, SelectorReg); + MIRBuilder.buildCast(ResRegs[1], PtrVReg); - VReg = MRI->createGenericVirtualRegister(Tys[1]); - MIRBuilder.buildInstr(TargetOpcode::G_PTRTOINT).addDef(VReg).addUse(PtrVReg); - MIRBuilder.buildInsert(getOrCreateVReg(LP), Tmp, VReg, - Tys[0].getSizeInBits()); return true; } @@ -992,6 +1157,9 @@ bool IRTranslator::translateAlloca(const User &U, MachineIRBuilder &MIRBuilder) { auto &AI = cast<AllocaInst>(U); + if (AI.isSwiftError()) + return false; + if (AI.isStaticAlloca()) { unsigned Res = getOrCreateVReg(AI); int FI = getOrCreateFrameIndex(AI); @@ -999,6 +1167,10 @@ bool IRTranslator::translateAlloca(const User &U, return true; } + // FIXME: support stack probing for Windows. + if (MF->getTarget().getTargetTriple().isOSWindows()) + return false; + // Now we're in the harder dynamic case. Type *Ty = AI.getAllocatedType(); unsigned Align = @@ -1070,9 +1242,16 @@ bool IRTranslator::translateInsertElement(const User &U, // not a legal vector type in LLT. if (U.getType()->getVectorNumElements() == 1) { unsigned Elt = getOrCreateVReg(*U.getOperand(1)); - ValToVReg[&U] = Elt; + auto &Regs = *VMap.getVRegs(U); + if (Regs.empty()) { + Regs.push_back(Elt); + VMap.getOffsets(U)->push_back(0); + } else { + MIRBuilder.buildCopy(Regs[0], Elt); + } return true; } + unsigned Res = getOrCreateVReg(U); unsigned Val = getOrCreateVReg(*U.getOperand(0)); unsigned Elt = getOrCreateVReg(*U.getOperand(1)); @@ -1087,7 +1266,13 @@ bool IRTranslator::translateExtractElement(const User &U, // not a legal vector type in LLT. if (U.getOperand(0)->getType()->getVectorNumElements() == 1) { unsigned Elt = getOrCreateVReg(*U.getOperand(0)); - ValToVReg[&U] = Elt; + auto &Regs = *VMap.getVRegs(U); + if (Regs.empty()) { + Regs.push_back(Elt); + VMap.getOffsets(U)->push_back(0); + } else { + MIRBuilder.buildCopy(Regs[0], Elt); + } return true; } unsigned Res = getOrCreateVReg(U); @@ -1109,17 +1294,115 @@ bool IRTranslator::translateShuffleVector(const User &U, bool IRTranslator::translatePHI(const User &U, MachineIRBuilder &MIRBuilder) { const PHINode &PI = cast<PHINode>(U); - auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_PHI); - MIB.addDef(getOrCreateVReg(PI)); - PendingPHIs.emplace_back(&PI, MIB.getInstr()); + SmallVector<MachineInstr *, 4> Insts; + for (auto Reg : getOrCreateVRegs(PI)) { + auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_PHI, Reg); + Insts.push_back(MIB.getInstr()); + } + + PendingPHIs.emplace_back(&PI, std::move(Insts)); + return true; +} + +bool IRTranslator::translateAtomicCmpXchg(const User &U, + MachineIRBuilder &MIRBuilder) { + const AtomicCmpXchgInst &I = cast<AtomicCmpXchgInst>(U); + + if (I.isWeak()) + return false; + + auto Flags = I.isVolatile() ? MachineMemOperand::MOVolatile + : MachineMemOperand::MONone; + Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore; + + Type *ResType = I.getType(); + Type *ValType = ResType->Type::getStructElementType(0); + + auto Res = getOrCreateVRegs(I); + unsigned OldValRes = Res[0]; + unsigned SuccessRes = Res[1]; + unsigned Addr = getOrCreateVReg(*I.getPointerOperand()); + unsigned Cmp = getOrCreateVReg(*I.getCompareOperand()); + unsigned NewVal = getOrCreateVReg(*I.getNewValOperand()); + + MIRBuilder.buildAtomicCmpXchgWithSuccess( + OldValRes, SuccessRes, Addr, Cmp, NewVal, + *MF->getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), + Flags, DL->getTypeStoreSize(ValType), + getMemOpAlignment(I), AAMDNodes(), nullptr, + I.getSyncScopeID(), I.getSuccessOrdering(), + I.getFailureOrdering())); + return true; +} + +bool IRTranslator::translateAtomicRMW(const User &U, + MachineIRBuilder &MIRBuilder) { + const AtomicRMWInst &I = cast<AtomicRMWInst>(U); + + auto Flags = I.isVolatile() ? MachineMemOperand::MOVolatile + : MachineMemOperand::MONone; + Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore; + + Type *ResType = I.getType(); + + unsigned Res = getOrCreateVReg(I); + unsigned Addr = getOrCreateVReg(*I.getPointerOperand()); + unsigned Val = getOrCreateVReg(*I.getValOperand()); + + unsigned Opcode = 0; + switch (I.getOperation()) { + default: + llvm_unreachable("Unknown atomicrmw op"); + return false; + case AtomicRMWInst::Xchg: + Opcode = TargetOpcode::G_ATOMICRMW_XCHG; + break; + case AtomicRMWInst::Add: + Opcode = TargetOpcode::G_ATOMICRMW_ADD; + break; + case AtomicRMWInst::Sub: + Opcode = TargetOpcode::G_ATOMICRMW_SUB; + break; + case AtomicRMWInst::And: + Opcode = TargetOpcode::G_ATOMICRMW_AND; + break; + case AtomicRMWInst::Nand: + Opcode = TargetOpcode::G_ATOMICRMW_NAND; + break; + case AtomicRMWInst::Or: + Opcode = TargetOpcode::G_ATOMICRMW_OR; + break; + case AtomicRMWInst::Xor: + Opcode = TargetOpcode::G_ATOMICRMW_XOR; + break; + case AtomicRMWInst::Max: + Opcode = TargetOpcode::G_ATOMICRMW_MAX; + break; + case AtomicRMWInst::Min: + Opcode = TargetOpcode::G_ATOMICRMW_MIN; + break; + case AtomicRMWInst::UMax: + Opcode = TargetOpcode::G_ATOMICRMW_UMAX; + break; + case AtomicRMWInst::UMin: + Opcode = TargetOpcode::G_ATOMICRMW_UMIN; + break; + } + + MIRBuilder.buildAtomicRMW( + Opcode, Res, Addr, Val, + *MF->getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), + Flags, DL->getTypeStoreSize(ResType), + getMemOpAlignment(I), AAMDNodes(), nullptr, + I.getSyncScopeID(), I.getOrdering())); return true; } void IRTranslator::finishPendingPhis() { - for (std::pair<const PHINode *, MachineInstr *> &Phi : PendingPHIs) { + for (auto &Phi : PendingPHIs) { const PHINode *PI = Phi.first; - MachineInstrBuilder MIB(*MF, Phi.second); + ArrayRef<MachineInstr *> ComponentPHIs = Phi.second; // All MachineBasicBlocks exist, add them to the PHI. We assume IRTranslator // won't create extra control flow here, otherwise we need to find the @@ -1133,17 +1416,27 @@ void IRTranslator::finishPendingPhis() { continue; HandledPreds.insert(IRPred); - unsigned ValReg = getOrCreateVReg(*PI->getIncomingValue(i)); + ArrayRef<unsigned> ValRegs = getOrCreateVRegs(*PI->getIncomingValue(i)); for (auto Pred : getMachinePredBBs({IRPred, PI->getParent()})) { - assert(Pred->isSuccessor(MIB->getParent()) && + assert(Pred->isSuccessor(ComponentPHIs[0]->getParent()) && "incorrect CFG at MachineBasicBlock level"); - MIB.addUse(ValReg); - MIB.addMBB(Pred); + for (unsigned j = 0; j < ValRegs.size(); ++j) { + MachineInstrBuilder MIB(*MF, ComponentPHIs[j]); + MIB.addUse(ValRegs[j]); + MIB.addMBB(Pred); + } } } } } +bool IRTranslator::valueIsSplit(const Value &V, + SmallVectorImpl<uint64_t> *Offsets) { + SmallVector<LLT, 4> SplitTys; + computeValueLLTs(*DL, *V.getType(), SplitTys, Offsets); + return SplitTys.size() > 1; +} + bool IRTranslator::translate(const Instruction &Inst) { CurBuilder.setDebugLoc(Inst.getDebugLoc()); switch(Inst.getOpcode()) { @@ -1162,9 +1455,15 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) { EntryBuilder.buildFConstant(Reg, *CF); else if (isa<UndefValue>(C)) EntryBuilder.buildUndef(Reg); - else if (isa<ConstantPointerNull>(C)) - EntryBuilder.buildConstant(Reg, 0); - else if (auto GV = dyn_cast<GlobalValue>(&C)) + else if (isa<ConstantPointerNull>(C)) { + // As we are trying to build a constant val of 0 into a pointer, + // insert a cast to make them correct with respect to types. + unsigned NullSize = DL->getTypeSizeInBits(C.getType()); + auto *ZeroTy = Type::getIntNTy(C.getContext(), NullSize); + auto *ZeroVal = ConstantInt::get(ZeroTy, 0); + unsigned ZeroReg = getOrCreateVReg(*ZeroVal); + EntryBuilder.buildCast(Reg, ZeroReg); + } else if (auto GV = dyn_cast<GlobalValue>(&C)) EntryBuilder.buildGlobalValue(Reg, GV); else if (auto CAZ = dyn_cast<ConstantAggregateZero>(&C)) { if (!CAZ->getType()->isVectorTy()) @@ -1196,23 +1495,6 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) { default: return false; } - } else if (auto CS = dyn_cast<ConstantStruct>(&C)) { - // Return the element if it is a single element ConstantStruct. - if (CS->getNumOperands() == 1) { - unsigned EltReg = getOrCreateVReg(*CS->getOperand(0)); - EntryBuilder.buildCast(Reg, EltReg); - return true; - } - SmallVector<unsigned, 4> Ops; - SmallVector<uint64_t, 4> Indices; - uint64_t Offset = 0; - for (unsigned i = 0; i < CS->getNumOperands(); ++i) { - unsigned OpReg = getOrCreateVReg(*CS->getOperand(i)); - Ops.push_back(OpReg); - Indices.push_back(Offset); - Offset += MRI->getType(OpReg).getSizeInBits(); - } - EntryBuilder.buildSequence(Reg, Ops, Indices); } else if (auto CV = dyn_cast<ConstantVector>(&C)) { if (CV->getNumOperands() == 1) return translate(*CV->getOperand(0), Reg); @@ -1231,7 +1513,7 @@ void IRTranslator::finalizeFunction() { // Release the memory used by the different maps we // needed during the translation. PendingPHIs.clear(); - ValToVReg.clear(); + VMap.reset(); FrameIndices.clear(); MachinePreds.clear(); // MachineIRBuilder::DebugLoc can outlive the DILocation it holds. Clear it @@ -1291,8 +1573,22 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { for (const Argument &Arg: F.args()) { if (DL->getTypeStoreSize(Arg.getType()) == 0) continue; // Don't handle zero sized types. - VRegArgs.push_back(getOrCreateVReg(Arg)); + VRegArgs.push_back( + MRI->createGenericVirtualRegister(getLLTForType(*Arg.getType(), *DL))); } + + // We don't currently support translating swifterror or swiftself functions. + for (auto &Arg : F.args()) { + if (Arg.hasSwiftErrorAttr() || Arg.hasSwiftSelfAttr()) { + OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", + F.getSubprogram(), &F.getEntryBlock()); + R << "unable to lower arguments due to swifterror/swiftself: " + << ore::NV("Prototype", F.getType()); + reportTranslationError(*MF, *TPC, *ORE, R); + return false; + } + } + if (!CLI->lowerFormalArguments(EntryBuilder, F, VRegArgs)) { OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", F.getSubprogram(), &F.getEntryBlock()); @@ -1301,14 +1597,28 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { return false; } + auto ArgIt = F.arg_begin(); + for (auto &VArg : VRegArgs) { + // If the argument is an unsplit scalar then don't use unpackRegs to avoid + // creating redundant copies. + if (!valueIsSplit(*ArgIt, VMap.getOffsets(*ArgIt))) { + auto &VRegs = *VMap.getVRegs(cast<Value>(*ArgIt)); + assert(VRegs.empty() && "VRegs already populated?"); + VRegs.push_back(VArg); + } else { + unpackRegs(*ArgIt, VArg, EntryBuilder); + } + ArgIt++; + } + // And translate the function! - for (const BasicBlock &BB: F) { + for (const BasicBlock &BB : F) { MachineBasicBlock &MBB = getMBB(BB); // Set the insertion point of all the following translations to // the end of this basic block. CurBuilder.setMBB(MBB); - for (const Instruction &Inst: BB) { + for (const Instruction &Inst : BB) { if (translate(Inst)) continue; @@ -1358,5 +1668,9 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { assert(&MF->front() == &NewEntryBB && "New entry wasn't next in the list of basic block!"); + // Initialize stack protector information. + StackProtector &SP = getAnalysis<StackProtector>(); + SP.copyToMachineFrameInfo(MF->getFrameInfo()); + return false; } diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp index 422cc2219aa8..c83c791327e4 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp @@ -12,7 +12,6 @@ #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" @@ -57,23 +56,17 @@ InstructionSelect::InstructionSelect() : MachineFunctionPass(ID) { void InstructionSelect::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<TargetPassConfig>(); + getSelectionDAGFallbackAnalysisUsage(AU); MachineFunctionPass::getAnalysisUsage(AU); } bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { - const MachineRegisterInfo &MRI = MF.getRegInfo(); - - // No matter what happens, whether we successfully select the function or not, - // nothing is going to use the vreg types after us. Make sure they disappear. - auto ClearVRegTypesOnReturn = - make_scope_exit([&]() { MRI.getVRegToType().clear(); }); - // If the ISel pipeline failed, do not bother running that pass. if (MF.getProperties().hasProperty( MachineFunctionProperties::Property::FailedISel)) return false; - DEBUG(dbgs() << "Selecting function: " << MF.getName() << '\n'); + LLVM_DEBUG(dbgs() << "Selecting function: " << MF.getName() << '\n'); const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>(); const InstructionSelector *ISel = MF.getSubtarget().getInstructionSelector(); @@ -85,23 +78,18 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { // FIXME: There are many other MF/MFI fields we need to initialize. + MachineRegisterInfo &MRI = MF.getRegInfo(); #ifndef NDEBUG // Check that our input is fully legal: we require the function to have the // Legalized property, so it should be. - // FIXME: This should be in the MachineVerifier, but it can't use the - // LegalizerInfo as it's currently in the separate GlobalISel library. - // The RegBankSelected property is already checked in the verifier. Note - // that it has the same layering problem, but we only use inline methods so - // end up not needing to link against the GlobalISel library. - if (const LegalizerInfo *MLI = MF.getSubtarget().getLegalizerInfo()) - for (MachineBasicBlock &MBB : MF) - for (MachineInstr &MI : MBB) - if (isPreISelGenericOpcode(MI.getOpcode()) && !MLI->isLegal(MI, MRI)) { - reportGISelFailure(MF, TPC, MORE, "gisel-select", - "instruction is not legal", MI); - return false; - } - + // FIXME: This should be in the MachineVerifier, as the RegBankSelected + // property check already is. + if (!DisableGISelLegalityCheck) + if (const MachineInstr *MI = machineFunctionIsIllegal(MF)) { + reportGISelFailure(MF, TPC, MORE, "gisel-select", + "instruction is not legal", *MI); + return false; + } #endif // FIXME: We could introduce new blocks and will need to fix the outer loop. // Until then, keep track of the number of blocks to assert that we don't. @@ -129,12 +117,12 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { else --MII; - DEBUG(dbgs() << "Selecting: \n " << MI); + LLVM_DEBUG(dbgs() << "Selecting: \n " << MI); // We could have folded this instruction away already, making it dead. // If so, erase it. if (isTriviallyDead(MI, MRI)) { - DEBUG(dbgs() << "Is dead; erasing.\n"); + LLVM_DEBUG(dbgs() << "Is dead; erasing.\n"); MI.eraseFromParentAndMarkDBGValuesForRemoval(); continue; } @@ -147,7 +135,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { } // Dump the range of instructions that MI expanded into. - DEBUG({ + LLVM_DEBUG({ auto InsertedBegin = ReachedBegin ? MBB->begin() : std::next(MII); dbgs() << "Into:\n"; for (auto &InsertedMI : make_range(InsertedBegin, AfterIt)) @@ -159,30 +147,63 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + for (MachineBasicBlock &MBB : MF) { + if (MBB.empty()) + continue; + + // Try to find redundant copies b/w vregs of the same register class. + bool ReachedBegin = false; + for (auto MII = std::prev(MBB.end()), Begin = MBB.begin(); !ReachedBegin;) { + // Select this instruction. + MachineInstr &MI = *MII; + + // And have our iterator point to the next instruction, if there is one. + if (MII == Begin) + ReachedBegin = true; + else + --MII; + if (MI.getOpcode() != TargetOpcode::COPY) + continue; + unsigned SrcReg = MI.getOperand(1).getReg(); + unsigned DstReg = MI.getOperand(0).getReg(); + if (TargetRegisterInfo::isVirtualRegister(SrcReg) && + TargetRegisterInfo::isVirtualRegister(DstReg)) { + auto SrcRC = MRI.getRegClass(SrcReg); + auto DstRC = MRI.getRegClass(DstReg); + if (SrcRC == DstRC) { + MRI.replaceRegWith(DstReg, SrcReg); + MI.eraseFromParentAndMarkDBGValuesForRemoval(); + } + } + } + } + // Now that selection is complete, there are no more generic vregs. Verify // that the size of the now-constrained vreg is unchanged and that it has a // register class. - for (auto &VRegToType : MRI.getVRegToType()) { - unsigned VReg = VRegToType.first; - auto *RC = MRI.getRegClassOrNull(VReg); + for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { + unsigned VReg = TargetRegisterInfo::index2VirtReg(I); + MachineInstr *MI = nullptr; if (!MRI.def_empty(VReg)) MI = &*MRI.def_instr_begin(VReg); else if (!MRI.use_empty(VReg)) MI = &*MRI.use_instr_begin(VReg); + if (!MI) + continue; - if (MI && !RC) { + const TargetRegisterClass *RC = MRI.getRegClassOrNull(VReg); + if (!RC) { reportGISelFailure(MF, TPC, MORE, "gisel-select", "VReg has no regclass after selection", *MI); return false; - } else if (!RC) - continue; + } - if (VRegToType.second.isValid() && - VRegToType.second.getSizeInBits() > TRI.getRegSizeInBits(*RC)) { - reportGISelFailure(MF, TPC, MORE, "gisel-select", - "VReg has explicit size different from class size", - *MI); + const LLT Ty = MRI.getType(VReg); + if (Ty.isValid() && Ty.getSizeInBits() > TRI.getRegSizeInBits(*RC)) { + reportGISelFailure( + MF, TPC, MORE, "gisel-select", + "VReg's low-level type and register class have different sizes", *MI); return false; } } @@ -199,6 +220,12 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { auto &TLI = *MF.getSubtarget().getTargetLowering(); TLI.finalizeLowering(MF); + LLVM_DEBUG({ + dbgs() << "Rules covered by selecting function: " << MF.getName() << ":"; + for (auto RuleID : CoverageInfo.covered()) + dbgs() << " id" << RuleID; + dbgs() << "\n\n"; + }); CoverageInfo.emit(CoveragePrefix, MF.getSubtarget() .getTargetLowering() @@ -206,6 +233,11 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { .getTarget() .getBackendName()); + // If we successfully selected the function nothing is going to use the vreg + // types after us (otherwise MIRPrinter would need them). Make sure the types + // disappear. + MRI.clearVirtRegTypes(); + // FIXME: Should we accurately track changes? return true; } diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp index 88669bd68c00..5e77fcbb0ed9 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -46,50 +46,6 @@ bool InstructionSelector::constrainOperandRegToRegClass( constrainRegToClass(MRI, TII, RBI, I, I.getOperand(OpIdx).getReg(), RC); } -bool InstructionSelector::constrainSelectedInstRegOperands( - MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, - const RegisterBankInfo &RBI) const { - MachineBasicBlock &MBB = *I.getParent(); - MachineFunction &MF = *MBB.getParent(); - MachineRegisterInfo &MRI = MF.getRegInfo(); - - for (unsigned OpI = 0, OpE = I.getNumExplicitOperands(); OpI != OpE; ++OpI) { - MachineOperand &MO = I.getOperand(OpI); - - // There's nothing to be done on non-register operands. - if (!MO.isReg()) - continue; - - DEBUG(dbgs() << "Converting operand: " << MO << '\n'); - assert(MO.isReg() && "Unsupported non-reg operand"); - - unsigned Reg = MO.getReg(); - // Physical registers don't need to be constrained. - if (TRI.isPhysicalRegister(Reg)) - continue; - - // Register operands with a value of 0 (e.g. predicate operands) don't need - // to be constrained. - if (Reg == 0) - continue; - - // If the operand is a vreg, we should constrain its regclass, and only - // insert COPYs if that's impossible. - // constrainOperandRegClass does that for us. - MO.setReg(constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, I.getDesc(), - Reg, OpI)); - - // Tie uses to defs as indicated in MCInstrDesc if this hasn't already been - // done. - if (MO.isUse()) { - int DefIdx = I.getDesc().getOperandConstraint(OpI, MCOI::TIED_TO); - if (DefIdx != -1 && !I.isRegTiedToUseOperand(DefIdx)) - I.tieOperands(DefIdx, OpI); - } - } - return true; -} - bool InstructionSelector::isOperandImmEqual( const MachineOperand &MO, int64_t Value, const MachineRegisterInfo &MRI) const { diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp new file mode 100644 index 000000000000..344f573a67f5 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp @@ -0,0 +1,101 @@ +//===- lib/CodeGen/GlobalISel/LegalizerPredicates.cpp - Predicates --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// A library of predicate factories to use for LegalityPredicate. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" + +using namespace llvm; + +LegalityPredicate LegalityPredicates::typeIs(unsigned TypeIdx, LLT Type) { + return + [=](const LegalityQuery &Query) { return Query.Types[TypeIdx] == Type; }; +} + +LegalityPredicate +LegalityPredicates::typeInSet(unsigned TypeIdx, + std::initializer_list<LLT> TypesInit) { + SmallVector<LLT, 4> Types = TypesInit; + return [=](const LegalityQuery &Query) { + return std::find(Types.begin(), Types.end(), Query.Types[TypeIdx]) != Types.end(); + }; +} + +LegalityPredicate LegalityPredicates::typePairInSet( + unsigned TypeIdx0, unsigned TypeIdx1, + std::initializer_list<std::pair<LLT, LLT>> TypesInit) { + SmallVector<std::pair<LLT, LLT>, 4> Types = TypesInit; + return [=](const LegalityQuery &Query) { + std::pair<LLT, LLT> Match = {Query.Types[TypeIdx0], Query.Types[TypeIdx1]}; + return std::find(Types.begin(), Types.end(), Match) != Types.end(); + }; +} + +LegalityPredicate LegalityPredicates::typePairAndMemSizeInSet( + unsigned TypeIdx0, unsigned TypeIdx1, unsigned MMOIdx, + std::initializer_list<TypePairAndMemSize> TypesAndMemSizeInit) { + SmallVector<TypePairAndMemSize, 4> TypesAndMemSize = TypesAndMemSizeInit; + return [=](const LegalityQuery &Query) { + TypePairAndMemSize Match = {Query.Types[TypeIdx0], Query.Types[TypeIdx1], + Query.MMODescrs[MMOIdx].Size}; + return std::find(TypesAndMemSize.begin(), TypesAndMemSize.end(), Match) != + TypesAndMemSize.end(); + }; +} + +LegalityPredicate LegalityPredicates::isScalar(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + return Query.Types[TypeIdx].isScalar(); + }; +} + +LegalityPredicate LegalityPredicates::narrowerThan(unsigned TypeIdx, + unsigned Size) { + return [=](const LegalityQuery &Query) { + const LLT &QueryTy = Query.Types[TypeIdx]; + return QueryTy.isScalar() && QueryTy.getSizeInBits() < Size; + }; +} + +LegalityPredicate LegalityPredicates::widerThan(unsigned TypeIdx, + unsigned Size) { + return [=](const LegalityQuery &Query) { + const LLT &QueryTy = Query.Types[TypeIdx]; + return QueryTy.isScalar() && QueryTy.getSizeInBits() > Size; + }; +} + +LegalityPredicate LegalityPredicates::sizeNotPow2(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + const LLT &QueryTy = Query.Types[TypeIdx]; + return QueryTy.isScalar() && !isPowerOf2_32(QueryTy.getSizeInBits()); + }; +} + +LegalityPredicate LegalityPredicates::memSizeInBytesNotPow2(unsigned MMOIdx) { + return [=](const LegalityQuery &Query) { + return !isPowerOf2_32(Query.MMODescrs[MMOIdx].Size /* In Bytes */); + }; +} + +LegalityPredicate LegalityPredicates::numElementsNotPow2(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + const LLT &QueryTy = Query.Types[TypeIdx]; + return QueryTy.isVector() && isPowerOf2_32(QueryTy.getNumElements()); + }; +} + +LegalityPredicate LegalityPredicates::atomicOrderingAtLeastOrStrongerThan( + unsigned MMOIdx, AtomicOrdering Ordering) { + return [=](const LegalityQuery &Query) { + return isAtLeastOrStrongerThan(Query.MMODescrs[MMOIdx].Ordering, Ordering); + }; +} diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp new file mode 100644 index 000000000000..a29b32ecdc03 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp @@ -0,0 +1,51 @@ +//===- lib/CodeGen/GlobalISel/LegalizerMutations.cpp - Mutations ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// A library of mutation factories to use for LegalityMutation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" + +using namespace llvm; + +LegalizeMutation LegalizeMutations::changeTo(unsigned TypeIdx, LLT Ty) { + return + [=](const LegalityQuery &Query) { return std::make_pair(TypeIdx, Ty); }; +} + +LegalizeMutation LegalizeMutations::changeTo(unsigned TypeIdx, + unsigned FromTypeIdx) { + return [=](const LegalityQuery &Query) { + return std::make_pair(TypeIdx, Query.Types[FromTypeIdx]); + }; +} + +LegalizeMutation LegalizeMutations::widenScalarToNextPow2(unsigned TypeIdx, + unsigned Min) { + return [=](const LegalityQuery &Query) { + unsigned NewSizeInBits = + 1 << Log2_32_Ceil(Query.Types[TypeIdx].getSizeInBits()); + if (NewSizeInBits < Min) + NewSizeInBits = Min; + return std::make_pair(TypeIdx, LLT::scalar(NewSizeInBits)); + }; +} + +LegalizeMutation LegalizeMutations::moreElementsToNextPow2(unsigned TypeIdx, + unsigned Min) { + return [=](const LegalityQuery &Query) { + const LLT &VecTy = Query.Types[TypeIdx]; + unsigned NewNumElements = 1 << Log2_32_Ceil(VecTy.getNumElements()); + if (NewNumElements < Min) + NewNumElements = Min; + return std::make_pair( + TypeIdx, LLT::vector(NewNumElements, VecTy.getScalarSizeInBits())); + }; +} diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp index f09b0d9f11e7..9a2aac998a84 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -47,6 +47,7 @@ Legalizer::Legalizer() : MachineFunctionPass(ID) { void Legalizer::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<TargetPassConfig>(); + getSelectionDAGFallbackAnalysisUsage(AU); MachineFunctionPass::getAnalysisUsage(AU); } @@ -72,7 +73,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { if (MF.getProperties().hasProperty( MachineFunctionProperties::Property::FailedISel)) return false; - DEBUG(dbgs() << "Legalize Machine IR for: " << MF.getName() << '\n'); + LLVM_DEBUG(dbgs() << "Legalize Machine IR for: " << MF.getName() << '\n'); init(MF); const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>(); MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr); @@ -112,7 +113,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { else InstList.insert(MI); } - DEBUG(dbgs() << ".. .. New MI: " << *MI;); + LLVM_DEBUG(dbgs() << ".. .. New MI: " << *MI;); }); const LegalizerInfo &LInfo(Helper.getLegalizerInfo()); LegalizationArtifactCombiner ArtCombiner(Helper.MIRBuilder, MF.getRegInfo(), LInfo); @@ -127,7 +128,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { MachineInstr &MI = *InstList.pop_back_val(); assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode"); if (isTriviallyDead(MI, MRI)) { - DEBUG(dbgs() << MI << "Is dead; erasing.\n"); + LLVM_DEBUG(dbgs() << MI << "Is dead; erasing.\n"); MI.eraseFromParentAndMarkDBGValuesForRemoval(); continue; } @@ -148,7 +149,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { MachineInstr &MI = *ArtifactList.pop_back_val(); assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode"); if (isTriviallyDead(MI, MRI)) { - DEBUG(dbgs() << MI << "Is dead; erasing.\n"); + LLVM_DEBUG(dbgs() << MI << "Is dead; erasing.\n"); RemoveDeadInstFromLists(&MI); MI.eraseFromParentAndMarkDBGValuesForRemoval(); continue; @@ -156,7 +157,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { SmallVector<MachineInstr *, 4> DeadInstructions; if (ArtCombiner.tryCombineInstruction(MI, DeadInstructions)) { for (auto *DeadMI : DeadInstructions) { - DEBUG(dbgs() << ".. Erasing Dead Instruction " << *DeadMI); + LLVM_DEBUG(dbgs() << ".. Erasing Dead Instruction " << *DeadMI); RemoveDeadInstFromLists(DeadMI); DeadMI->eraseFromParentAndMarkDBGValuesForRemoval(); } diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 6bebe180fefd..87086af121b7 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -26,6 +26,7 @@ #define DEBUG_TYPE "legalizer" using namespace llvm; +using namespace LegalizeActions; LegalizerHelper::LegalizerHelper(MachineFunction &MF) : MRI(MF.getRegInfo()), LI(*MF.getSubtarget().getLegalizerInfo()) { @@ -34,34 +35,34 @@ LegalizerHelper::LegalizerHelper(MachineFunction &MF) LegalizerHelper::LegalizeResult LegalizerHelper::legalizeInstrStep(MachineInstr &MI) { - DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs())); + LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs())); - auto Action = LI.getAction(MI, MRI); - switch (std::get<0>(Action)) { - case LegalizerInfo::Legal: - DEBUG(dbgs() << ".. Already legal\n"); + auto Step = LI.getAction(MI, MRI); + switch (Step.Action) { + case Legal: + LLVM_DEBUG(dbgs() << ".. Already legal\n"); return AlreadyLegal; - case LegalizerInfo::Libcall: - DEBUG(dbgs() << ".. Convert to libcall\n"); + case Libcall: + LLVM_DEBUG(dbgs() << ".. Convert to libcall\n"); return libcall(MI); - case LegalizerInfo::NarrowScalar: - DEBUG(dbgs() << ".. Narrow scalar\n"); - return narrowScalar(MI, std::get<1>(Action), std::get<2>(Action)); - case LegalizerInfo::WidenScalar: - DEBUG(dbgs() << ".. Widen scalar\n"); - return widenScalar(MI, std::get<1>(Action), std::get<2>(Action)); - case LegalizerInfo::Lower: - DEBUG(dbgs() << ".. Lower\n"); - return lower(MI, std::get<1>(Action), std::get<2>(Action)); - case LegalizerInfo::FewerElements: - DEBUG(dbgs() << ".. Reduce number of elements\n"); - return fewerElementsVector(MI, std::get<1>(Action), std::get<2>(Action)); - case LegalizerInfo::Custom: - DEBUG(dbgs() << ".. Custom legalization\n"); + case NarrowScalar: + LLVM_DEBUG(dbgs() << ".. Narrow scalar\n"); + return narrowScalar(MI, Step.TypeIdx, Step.NewType); + case WidenScalar: + LLVM_DEBUG(dbgs() << ".. Widen scalar\n"); + return widenScalar(MI, Step.TypeIdx, Step.NewType); + case Lower: + LLVM_DEBUG(dbgs() << ".. Lower\n"); + return lower(MI, Step.TypeIdx, Step.NewType); + case FewerElements: + LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n"); + return fewerElementsVector(MI, Step.TypeIdx, Step.NewType); + case Custom: + LLVM_DEBUG(dbgs() << ".. Custom legalization\n"); return LI.legalizeCustom(MI, MRI, MIRBuilder) ? Legalized : UnableToLegalize; default: - DEBUG(dbgs() << ".. Unable to legalize\n"); + LLVM_DEBUG(dbgs() << ".. Unable to legalize\n"); return UnableToLegalize; } } @@ -103,6 +104,9 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32; case TargetOpcode::G_FPOW: return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32; + case TargetOpcode::G_FMA: + assert((Size == 32 || Size == 64) && "Unsupported size"); + return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32; } llvm_unreachable("Unknown libcall function"); } @@ -123,13 +127,47 @@ llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, return LegalizerHelper::Legalized; } +// Useful for libcalls where all operands have the same type. static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType) { auto Libcall = getRTLibDesc(MI.getOpcode(), Size); + + SmallVector<CallLowering::ArgInfo, 3> Args; + for (unsigned i = 1; i < MI.getNumOperands(); i++) + Args.push_back({MI.getOperand(i).getReg(), OpType}); return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType}, - {{MI.getOperand(1).getReg(), OpType}, - {MI.getOperand(2).getReg(), OpType}}); + Args); +} + +static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, + Type *FromType) { + auto ToMVT = MVT::getVT(ToType); + auto FromMVT = MVT::getVT(FromType); + + switch (Opcode) { + case TargetOpcode::G_FPEXT: + return RTLIB::getFPEXT(FromMVT, ToMVT); + case TargetOpcode::G_FPTRUNC: + return RTLIB::getFPROUND(FromMVT, ToMVT); + case TargetOpcode::G_FPTOSI: + return RTLIB::getFPTOSINT(FromMVT, ToMVT); + case TargetOpcode::G_FPTOUI: + return RTLIB::getFPTOUINT(FromMVT, ToMVT); + case TargetOpcode::G_SITOFP: + return RTLIB::getSINTTOFP(FromMVT, ToMVT); + case TargetOpcode::G_UITOFP: + return RTLIB::getUINTTOFP(FromMVT, ToMVT); + } + llvm_unreachable("Unsupported libcall function"); +} + +static LegalizerHelper::LegalizeResult +conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, + Type *FromType) { + RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType); + return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType}, + {{MI.getOperand(1).getReg(), FromType}}); } LegalizerHelper::LegalizeResult @@ -157,6 +195,7 @@ LegalizerHelper::libcall(MachineInstr &MI) { case TargetOpcode::G_FSUB: case TargetOpcode::G_FMUL: case TargetOpcode::G_FDIV: + case TargetOpcode::G_FMA: case TargetOpcode::G_FPOW: case TargetOpcode::G_FREM: { Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx); @@ -165,6 +204,59 @@ LegalizerHelper::libcall(MachineInstr &MI) { return Status; break; } + case TargetOpcode::G_FPEXT: { + // FIXME: Support other floating point types (half, fp128 etc) + unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); + unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + if (ToSize != 64 || FromSize != 32) + return UnableToLegalize; + LegalizeResult Status = conversionLibcall( + MI, MIRBuilder, Type::getDoubleTy(Ctx), Type::getFloatTy(Ctx)); + if (Status != Legalized) + return Status; + break; + } + case TargetOpcode::G_FPTRUNC: { + // FIXME: Support other floating point types (half, fp128 etc) + unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); + unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + if (ToSize != 32 || FromSize != 64) + return UnableToLegalize; + LegalizeResult Status = conversionLibcall( + MI, MIRBuilder, Type::getFloatTy(Ctx), Type::getDoubleTy(Ctx)); + if (Status != Legalized) + return Status; + break; + } + case TargetOpcode::G_FPTOSI: + case TargetOpcode::G_FPTOUI: { + // FIXME: Support other types + unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); + unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + if (ToSize != 32 || (FromSize != 32 && FromSize != 64)) + return UnableToLegalize; + LegalizeResult Status = conversionLibcall( + MI, MIRBuilder, Type::getInt32Ty(Ctx), + FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx)); + if (Status != Legalized) + return Status; + break; + } + case TargetOpcode::G_SITOFP: + case TargetOpcode::G_UITOFP: { + // FIXME: Support other types + unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); + unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + if (FromSize != 32 || (ToSize != 32 && ToSize != 64)) + return UnableToLegalize; + LegalizeResult Status = conversionLibcall( + MI, MIRBuilder, + ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx), + Type::getInt32Ty(Ctx)); + if (Status != Legalized) + return Status; + break; + } } MI.eraseFromParent(); @@ -180,8 +272,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, MIRBuilder.setInstr(MI); - int64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); - int64_t NarrowSize = NarrowTy.getSizeInBits(); + uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + uint64_t NarrowSize = NarrowTy.getSizeInBits(); switch (MI.getOpcode()) { default: @@ -194,11 +286,9 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, int NumParts = SizeOp0 / NarrowSize; SmallVector<unsigned, 2> DstRegs; - for (int i = 0; i < NumParts; ++i) { - unsigned Dst = MRI.createGenericVirtualRegister(NarrowTy); - MIRBuilder.buildUndef(Dst); - DstRegs.push_back(Dst); - } + for (int i = 0; i < NumParts; ++i) + DstRegs.push_back( + MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg()); MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); MI.eraseFromParent(); return Legalized; @@ -249,8 +339,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); unsigned OpReg = MI.getOperand(0).getReg(); - int64_t OpStart = MI.getOperand(2).getImm(); - int64_t OpSize = MRI.getType(OpReg).getSizeInBits(); + uint64_t OpStart = MI.getOperand(2).getImm(); + uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); for (int i = 0; i < NumParts; ++i) { unsigned SrcStart = i * NarrowSize; @@ -265,7 +355,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, // OpSegStart is where this destination segment would start in OpReg if it // extended infinitely in both directions. - int64_t ExtractOffset, SegSize; + int64_t ExtractOffset; + uint64_t SegSize; if (OpStart < SrcStart) { ExtractOffset = 0; SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart); @@ -301,8 +392,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); unsigned OpReg = MI.getOperand(2).getReg(); - int64_t OpStart = MI.getOperand(3).getImm(); - int64_t OpSize = MRI.getType(OpReg).getSizeInBits(); + uint64_t OpStart = MI.getOperand(3).getImm(); + uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); for (int i = 0; i < NumParts; ++i) { unsigned DstStart = i * NarrowSize; @@ -319,7 +410,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, // OpSegStart is where this destination segment would start in OpReg if it // extended infinitely in both directions. - int64_t ExtractOffset, InsertOffset, SegSize; + int64_t ExtractOffset, InsertOffset; + uint64_t SegSize; if (OpStart < DstStart) { InsertOffset = 0; ExtractOffset = DstStart - OpStart; @@ -353,6 +445,14 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, // NarrowSize. if (SizeOp0 % NarrowSize != 0) return UnableToLegalize; + + const auto &MMO = **MI.memoperands_begin(); + // This implementation doesn't work for atomics. Give up instead of doing + // something invalid. + if (MMO.getOrdering() != AtomicOrdering::NotAtomic || + MMO.getFailureOrdering() != AtomicOrdering::NotAtomic) + return UnableToLegalize; + int NumParts = SizeOp0 / NarrowSize; LLT OffsetTy = LLT::scalar( MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); @@ -363,12 +463,16 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, unsigned SrcReg = 0; unsigned Adjustment = i * NarrowSize / 8; + MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand( + MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(), + NarrowSize / 8, i == 0 ? MMO.getAlignment() : NarrowSize / 8, + MMO.getAAInfo(), MMO.getRanges(), MMO.getSyncScopeID(), + MMO.getOrdering(), MMO.getFailureOrdering()); + MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy, Adjustment); - // TODO: This is conservatively correct, but we probably want to split the - // memory operands in the future. - MIRBuilder.buildLoad(DstReg, SrcReg, **MI.memoperands_begin()); + MIRBuilder.buildLoad(DstReg, SrcReg, *SplitMMO); DstRegs.push_back(DstReg); } @@ -382,6 +486,14 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, // NarrowSize. if (SizeOp0 % NarrowSize != 0) return UnableToLegalize; + + const auto &MMO = **MI.memoperands_begin(); + // This implementation doesn't work for atomics. Give up instead of doing + // something invalid. + if (MMO.getOrdering() != AtomicOrdering::NotAtomic || + MMO.getFailureOrdering() != AtomicOrdering::NotAtomic) + return UnableToLegalize; + int NumParts = SizeOp0 / NarrowSize; LLT OffsetTy = LLT::scalar( MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); @@ -393,12 +505,16 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, unsigned DstReg = 0; unsigned Adjustment = i * NarrowSize / 8; + MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand( + MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(), + NarrowSize / 8, i == 0 ? MMO.getAlignment() : NarrowSize / 8, + MMO.getAAInfo(), MMO.getRanges(), MMO.getSyncScopeID(), + MMO.getOrdering(), MMO.getFailureOrdering()); + MIRBuilder.materializeGEP(DstReg, MI.getOperand(1).getReg(), OffsetTy, Adjustment); - // TODO: This is conservatively correct, but we probably want to split the - // memory operands in the future. - MIRBuilder.buildStore(SrcRegs[i], DstReg, **MI.memoperands_begin()); + MIRBuilder.buildStore(SrcRegs[i], DstReg, *SplitMMO); } MI.eraseFromParent(); return Legalized; @@ -475,6 +591,22 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, } } +void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy, + unsigned OpIdx, unsigned ExtOpcode) { + MachineOperand &MO = MI.getOperand(OpIdx); + auto ExtB = MIRBuilder.buildInstr(ExtOpcode, WideTy, MO.getReg()); + MO.setReg(ExtB->getOperand(0).getReg()); +} + +void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy, + unsigned OpIdx, unsigned TruncOpcode) { + MachineOperand &MO = MI.getOperand(OpIdx); + unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); + MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); + MIRBuilder.buildInstr(TruncOpcode, MO.getReg(), DstExt); + MO.setReg(DstExt); +} + LegalizerHelper::LegalizeResult LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { MIRBuilder.setInstr(MI); @@ -482,303 +614,201 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { switch (MI.getOpcode()) { default: return UnableToLegalize; + case TargetOpcode::G_ADD: case TargetOpcode::G_AND: case TargetOpcode::G_MUL: case TargetOpcode::G_OR: case TargetOpcode::G_XOR: case TargetOpcode::G_SUB: - case TargetOpcode::G_SHL: { // Perform operation at larger width (any extension is fine here, high bits // don't affect the result) and then truncate the result back to the // original type. - unsigned Src1Ext = MRI.createGenericVirtualRegister(WideTy); - unsigned Src2Ext = MRI.createGenericVirtualRegister(WideTy); - MIRBuilder.buildAnyExt(Src1Ext, MI.getOperand(1).getReg()); - MIRBuilder.buildAnyExt(Src2Ext, MI.getOperand(2).getReg()); - - unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); - MIRBuilder.buildInstr(MI.getOpcode()) - .addDef(DstExt) - .addUse(Src1Ext) - .addUse(Src2Ext); - - MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), DstExt); - MI.eraseFromParent(); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); + widenScalarDst(MI, WideTy); + MIRBuilder.recordInsertion(&MI); return Legalized; - } + + case TargetOpcode::G_SHL: + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); + // The "number of bits to shift" operand must preserve its value as an + // unsigned integer: + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); + widenScalarDst(MI, WideTy); + MIRBuilder.recordInsertion(&MI); + return Legalized; + case TargetOpcode::G_SDIV: - case TargetOpcode::G_UDIV: case TargetOpcode::G_SREM: - case TargetOpcode::G_UREM: + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); + widenScalarDst(MI, WideTy); + MIRBuilder.recordInsertion(&MI); + return Legalized; + case TargetOpcode::G_ASHR: - case TargetOpcode::G_LSHR: { - unsigned ExtOp = MI.getOpcode() == TargetOpcode::G_SDIV || - MI.getOpcode() == TargetOpcode::G_SREM || - MI.getOpcode() == TargetOpcode::G_ASHR - ? TargetOpcode::G_SEXT - : TargetOpcode::G_ZEXT; - - unsigned LHSExt = MRI.createGenericVirtualRegister(WideTy); - MIRBuilder.buildInstr(ExtOp).addDef(LHSExt).addUse( - MI.getOperand(1).getReg()); - - unsigned RHSExt = MRI.createGenericVirtualRegister(WideTy); - MIRBuilder.buildInstr(ExtOp).addDef(RHSExt).addUse( - MI.getOperand(2).getReg()); - - unsigned ResExt = MRI.createGenericVirtualRegister(WideTy); - MIRBuilder.buildInstr(MI.getOpcode()) - .addDef(ResExt) - .addUse(LHSExt) - .addUse(RHSExt); - - MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), ResExt); - MI.eraseFromParent(); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); + // The "number of bits to shift" operand must preserve its value as an + // unsigned integer: + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); + widenScalarDst(MI, WideTy); + MIRBuilder.recordInsertion(&MI); return Legalized; - } - case TargetOpcode::G_SELECT: { + + case TargetOpcode::G_UDIV: + case TargetOpcode::G_UREM: + case TargetOpcode::G_LSHR: + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); + widenScalarDst(MI, WideTy); + MIRBuilder.recordInsertion(&MI); + return Legalized; + + case TargetOpcode::G_SELECT: if (TypeIdx != 0) return UnableToLegalize; - // Perform operation at larger width (any extension is fine here, high bits // don't affect the result) and then truncate the result back to the // original type. - unsigned Src1Ext = MRI.createGenericVirtualRegister(WideTy); - unsigned Src2Ext = MRI.createGenericVirtualRegister(WideTy); - MIRBuilder.buildAnyExt(Src1Ext, MI.getOperand(2).getReg()); - MIRBuilder.buildAnyExt(Src2Ext, MI.getOperand(3).getReg()); - - unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); - MIRBuilder.buildInstr(TargetOpcode::G_SELECT) - .addDef(DstExt) - .addReg(MI.getOperand(1).getReg()) - .addUse(Src1Ext) - .addUse(Src2Ext); - - MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), DstExt); - MI.eraseFromParent(); + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); + widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT); + widenScalarDst(MI, WideTy); + MIRBuilder.recordInsertion(&MI); return Legalized; - } + case TargetOpcode::G_FPTOSI: - case TargetOpcode::G_FPTOUI: { + case TargetOpcode::G_FPTOUI: if (TypeIdx != 0) return UnableToLegalize; - - unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); - MIRBuilder.buildInstr(MI.getOpcode()) - .addDef(DstExt) - .addUse(MI.getOperand(1).getReg()); - - MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), DstExt); - MI.eraseFromParent(); + widenScalarDst(MI, WideTy); + MIRBuilder.recordInsertion(&MI); return Legalized; - } + case TargetOpcode::G_SITOFP: - case TargetOpcode::G_UITOFP: { if (TypeIdx != 1) return UnableToLegalize; + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); + MIRBuilder.recordInsertion(&MI); + return Legalized; - unsigned Src = MI.getOperand(1).getReg(); - unsigned SrcExt = MRI.createGenericVirtualRegister(WideTy); - - if (MI.getOpcode() == TargetOpcode::G_SITOFP) { - MIRBuilder.buildSExt(SrcExt, Src); - } else { - assert(MI.getOpcode() == TargetOpcode::G_UITOFP && "Unexpected conv op"); - MIRBuilder.buildZExt(SrcExt, Src); - } - - MIRBuilder.buildInstr(MI.getOpcode()) - .addDef(MI.getOperand(0).getReg()) - .addUse(SrcExt); - - MI.eraseFromParent(); + case TargetOpcode::G_UITOFP: + if (TypeIdx != 1) + return UnableToLegalize; + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); + MIRBuilder.recordInsertion(&MI); return Legalized; - } - case TargetOpcode::G_INSERT: { + + case TargetOpcode::G_INSERT: if (TypeIdx != 0) return UnableToLegalize; - - unsigned Src = MI.getOperand(1).getReg(); - unsigned SrcExt = MRI.createGenericVirtualRegister(WideTy); - MIRBuilder.buildAnyExt(SrcExt, Src); - - unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); - auto MIB = MIRBuilder.buildInsert(DstExt, SrcExt, MI.getOperand(2).getReg(), - MI.getOperand(3).getImm()); - for (unsigned OpNum = 4; OpNum < MI.getNumOperands(); OpNum += 2) { - MIB.addReg(MI.getOperand(OpNum).getReg()); - MIB.addImm(MI.getOperand(OpNum + 1).getImm()); - } - - MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), DstExt); - MI.eraseFromParent(); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); + widenScalarDst(MI, WideTy); + MIRBuilder.recordInsertion(&MI); return Legalized; - } - case TargetOpcode::G_LOAD: { - assert(alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) == - WideTy.getSizeInBits() && - "illegal to increase number of bytes loaded"); - - unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); - MIRBuilder.buildLoad(DstExt, MI.getOperand(1).getReg(), - **MI.memoperands_begin()); - MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), DstExt); - MI.eraseFromParent(); + + case TargetOpcode::G_LOAD: + // For some types like i24, we might try to widen to i32. To properly handle + // this we should be using a dedicated extending load, until then avoid + // trying to legalize. + if (alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) != + WideTy.getSizeInBits()) + return UnableToLegalize; + LLVM_FALLTHROUGH; + case TargetOpcode::G_SEXTLOAD: + case TargetOpcode::G_ZEXTLOAD: + widenScalarDst(MI, WideTy); + MIRBuilder.recordInsertion(&MI); return Legalized; - } + case TargetOpcode::G_STORE: { if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(1) || WideTy != LLT::scalar(8)) return UnableToLegalize; - auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); - auto Content = TLI.getBooleanContents(false, false); - - unsigned ExtOp = TargetOpcode::G_ANYEXT; - if (Content == TargetLoweringBase::ZeroOrOneBooleanContent) - ExtOp = TargetOpcode::G_ZEXT; - else if (Content == TargetLoweringBase::ZeroOrNegativeOneBooleanContent) - ExtOp = TargetOpcode::G_SEXT; - else - ExtOp = TargetOpcode::G_ANYEXT; - - unsigned SrcExt = MRI.createGenericVirtualRegister(WideTy); - MIRBuilder.buildInstr(ExtOp).addDef(SrcExt).addUse( - MI.getOperand(0).getReg()); - MIRBuilder.buildStore(SrcExt, MI.getOperand(1).getReg(), - **MI.memoperands_begin()); - MI.eraseFromParent(); + widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ZEXT); + MIRBuilder.recordInsertion(&MI); return Legalized; } case TargetOpcode::G_CONSTANT: { - unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); - MIRBuilder.buildConstant(DstExt, *MI.getOperand(1).getCImm()); - MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), DstExt); - MI.eraseFromParent(); + MachineOperand &SrcMO = MI.getOperand(1); + LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); + const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits()); + SrcMO.setCImm(ConstantInt::get(Ctx, Val)); + + widenScalarDst(MI, WideTy); + MIRBuilder.recordInsertion(&MI); return Legalized; } case TargetOpcode::G_FCONSTANT: { - unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); - const ConstantFP *CFP = MI.getOperand(1).getFPImm(); - APFloat Val = CFP->getValueAPF(); + MachineOperand &SrcMO = MI.getOperand(1); LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); - auto LLT2Sem = [](LLT Ty) { - switch (Ty.getSizeInBits()) { - case 32: - return &APFloat::IEEEsingle(); - break; - case 64: - return &APFloat::IEEEdouble(); - break; - default: - llvm_unreachable("Unhandled fp widen type"); - } - }; + APFloat Val = SrcMO.getFPImm()->getValueAPF(); bool LosesInfo; - Val.convert(*LLT2Sem(WideTy), APFloat::rmTowardZero, &LosesInfo); - MIRBuilder.buildFConstant(DstExt, *ConstantFP::get(Ctx, Val)); - MIRBuilder.buildFPTrunc(MI.getOperand(0).getReg(), DstExt); - MI.eraseFromParent(); + switch (WideTy.getSizeInBits()) { + case 32: + Val.convert(APFloat::IEEEsingle(), APFloat::rmTowardZero, &LosesInfo); + break; + case 64: + Val.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &LosesInfo); + break; + default: + llvm_unreachable("Unhandled fp widen type"); + } + SrcMO.setFPImm(ConstantFP::get(Ctx, Val)); + + widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); + MIRBuilder.recordInsertion(&MI); return Legalized; } - case TargetOpcode::G_BRCOND: { - unsigned TstExt = MRI.createGenericVirtualRegister(WideTy); - MIRBuilder.buildAnyExt(TstExt, MI.getOperand(0).getReg()); - MIRBuilder.buildBrCond(TstExt, *MI.getOperand(1).getMBB()); - MI.eraseFromParent(); + case TargetOpcode::G_BRCOND: + widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT); + MIRBuilder.recordInsertion(&MI); return Legalized; - } - case TargetOpcode::G_FCMP: { - unsigned Op0Ext, Op1Ext, DstReg; - unsigned Cmp1 = MI.getOperand(2).getReg(); - unsigned Cmp2 = MI.getOperand(3).getReg(); - if (TypeIdx == 0) { - Op0Ext = Cmp1; - Op1Ext = Cmp2; - DstReg = MRI.createGenericVirtualRegister(WideTy); - } else { - Op0Ext = MRI.createGenericVirtualRegister(WideTy); - Op1Ext = MRI.createGenericVirtualRegister(WideTy); - DstReg = MI.getOperand(0).getReg(); - MIRBuilder.buildInstr(TargetOpcode::G_FPEXT, Op0Ext, Cmp1); - MIRBuilder.buildInstr(TargetOpcode::G_FPEXT, Op1Ext, Cmp2); - } - MIRBuilder.buildFCmp( - static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()), - DstReg, Op0Ext, Op1Ext); + + case TargetOpcode::G_FCMP: if (TypeIdx == 0) - MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, MI.getOperand(0).getReg(), - DstReg); - MI.eraseFromParent(); - return Legalized; - } - case TargetOpcode::G_ICMP: { - bool IsSigned = CmpInst::isSigned( - static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate())); - unsigned Cmp1 = MI.getOperand(2).getReg(); - unsigned Cmp2 = MI.getOperand(3).getReg(); - unsigned Op0Ext, Op1Ext, DstReg; - if (TypeIdx == 0) { - Op0Ext = Cmp1; - Op1Ext = Cmp2; - DstReg = MRI.createGenericVirtualRegister(WideTy); - } else { - Op0Ext = MRI.createGenericVirtualRegister(WideTy); - Op1Ext = MRI.createGenericVirtualRegister(WideTy); - DstReg = MI.getOperand(0).getReg(); - if (IsSigned) { - MIRBuilder.buildSExt(Op0Ext, Cmp1); - MIRBuilder.buildSExt(Op1Ext, Cmp2); - } else { - MIRBuilder.buildZExt(Op0Ext, Cmp1); - MIRBuilder.buildZExt(Op1Ext, Cmp2); - } + widenScalarDst(MI, WideTy); + else { + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT); + widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT); } - MIRBuilder.buildICmp( - static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()), - DstReg, Op0Ext, Op1Ext); + MIRBuilder.recordInsertion(&MI); + return Legalized; + + case TargetOpcode::G_ICMP: if (TypeIdx == 0) - MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, MI.getOperand(0).getReg(), - DstReg); - MI.eraseFromParent(); + widenScalarDst(MI, WideTy); + else { + unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>( + MI.getOperand(1).getPredicate())) + ? TargetOpcode::G_SEXT + : TargetOpcode::G_ZEXT; + widenScalarSrc(MI, WideTy, 2, ExtOpcode); + widenScalarSrc(MI, WideTy, 3, ExtOpcode); + } + MIRBuilder.recordInsertion(&MI); return Legalized; - } - case TargetOpcode::G_GEP: { + + case TargetOpcode::G_GEP: assert(TypeIdx == 1 && "unable to legalize pointer of GEP"); - unsigned OffsetExt = MRI.createGenericVirtualRegister(WideTy); - MIRBuilder.buildSExt(OffsetExt, MI.getOperand(2).getReg()); - MI.getOperand(2).setReg(OffsetExt); + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); + MIRBuilder.recordInsertion(&MI); return Legalized; - } + case TargetOpcode::G_PHI: { assert(TypeIdx == 0 && "Expecting only Idx 0"); - auto getExtendedReg = [&](unsigned Reg, MachineBasicBlock &MBB) { - auto FirstTermIt = MBB.getFirstTerminator(); - MIRBuilder.setInsertPt(MBB, FirstTermIt); - MachineInstr *DefMI = MRI.getVRegDef(Reg); - MachineInstrBuilder MIB; - if (DefMI->getOpcode() == TargetOpcode::G_TRUNC) - MIB = MIRBuilder.buildAnyExtOrTrunc(WideTy, - DefMI->getOperand(1).getReg()); - else - MIB = MIRBuilder.buildAnyExt(WideTy, Reg); - return MIB->getOperand(0).getReg(); - }; - auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_PHI, WideTy); - for (auto OpIt = MI.operands_begin() + 1, OpE = MI.operands_end(); - OpIt != OpE;) { - unsigned Reg = OpIt++->getReg(); - MachineBasicBlock *OpMBB = OpIt++->getMBB(); - MIB.addReg(getExtendedReg(Reg, *OpMBB)); - MIB.addMBB(OpMBB); + + for (unsigned I = 1; I < MI.getNumOperands(); I += 2) { + MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); + MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); + widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT); } - auto *MBB = MI.getParent(); - MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI()); - MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), - MIB->getOperand(0).getReg()); - MI.eraseFromParent(); + + MachineBasicBlock &MBB = *MI.getParent(); + MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI()); + widenScalarDst(MI, WideTy); + MIRBuilder.recordInsertion(&MI); return Legalized; } } @@ -874,11 +904,10 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { } ConstantFP &ZeroForNegation = *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy)); - unsigned Zero = MRI.createGenericVirtualRegister(Ty); - MIRBuilder.buildFConstant(Zero, ZeroForNegation); + auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation); MIRBuilder.buildInstr(TargetOpcode::G_FSUB) .addDef(Res) - .addUse(Zero) + .addUse(Zero->getOperand(0).getReg()) .addUse(MI.getOperand(1).getReg()); MI.eraseFromParent(); return Legalized; @@ -887,7 +916,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)). // First, check if G_FNEG is marked as Lower. If so, we may // end up with an infinite loop as G_FSUB is used to legalize G_FNEG. - if (LI.getAction({G_FNEG, Ty}).first == LegalizerInfo::Lower) + if (LI.getAction({G_FNEG, {Ty}}).Action == Lower) return UnableToLegalize; unsigned Res = MI.getOperand(0).getReg(); unsigned LHS = MI.getOperand(1).getReg(); @@ -913,6 +942,48 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_LOAD: + case TargetOpcode::G_SEXTLOAD: + case TargetOpcode::G_ZEXTLOAD: { + // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned PtrReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + auto &MMO = **MI.memoperands_begin(); + + if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) { + // In the case of G_LOAD, this was a non-extending load already and we're + // about to lower to the same instruction. + if (MI.getOpcode() == TargetOpcode::G_LOAD) + return UnableToLegalize; + MIRBuilder.buildLoad(DstReg, PtrReg, MMO); + MI.eraseFromParent(); + return Legalized; + } + + if (DstTy.isScalar()) { + unsigned TmpReg = MRI.createGenericVirtualRegister( + LLT::scalar(MMO.getSize() /* in bytes */ * 8)); + MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); + switch (MI.getOpcode()) { + default: + llvm_unreachable("Unexpected opcode"); + case TargetOpcode::G_LOAD: + MIRBuilder.buildAnyExt(DstReg, TmpReg); + break; + case TargetOpcode::G_SEXTLOAD: + MIRBuilder.buildSExt(DstReg, TmpReg); + break; + case TargetOpcode::G_ZEXTLOAD: + MIRBuilder.buildZExt(DstReg, TmpReg); + break; + } + MI.eraseFromParent(); + return Legalized; + } + + return UnableToLegalize; + } } } diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp index 9c27c59a0654..ae061b64a38c 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -24,12 +24,87 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LowLevelTypeImpl.h" #include "llvm/Support/MathExtras.h" #include <algorithm> #include <map> + using namespace llvm; +using namespace LegalizeActions; + +#define DEBUG_TYPE "legalizer-info" + +cl::opt<bool> llvm::DisableGISelLegalityCheck( + "disable-gisel-legality-check", + cl::desc("Don't verify that MIR is fully legal between GlobalISel passes"), + cl::Hidden); + +raw_ostream &LegalityQuery::print(raw_ostream &OS) const { + OS << Opcode << ", Tys={"; + for (const auto &Type : Types) { + OS << Type << ", "; + } + OS << "}, Opcode="; + + OS << Opcode << ", MMOs={"; + for (const auto &MMODescr : MMODescrs) { + OS << MMODescr.Size << ", "; + } + OS << "}"; + + return OS; +} + +LegalizeActionStep LegalizeRuleSet::apply(const LegalityQuery &Query) const { + LLVM_DEBUG(dbgs() << "Applying legalizer ruleset to: "; Query.print(dbgs()); + dbgs() << "\n"); + if (Rules.empty()) { + LLVM_DEBUG(dbgs() << ".. fallback to legacy rules (no rules defined)\n"); + return {LegalizeAction::UseLegacyRules, 0, LLT{}}; + } + for (const auto &Rule : Rules) { + if (Rule.match(Query)) { + LLVM_DEBUG(dbgs() << ".. match\n"); + std::pair<unsigned, LLT> Mutation = Rule.determineMutation(Query); + LLVM_DEBUG(dbgs() << ".. .. " << (unsigned)Rule.getAction() << ", " + << Mutation.first << ", " << Mutation.second << "\n"); + assert((Query.Types[Mutation.first] != Mutation.second || + Rule.getAction() == Lower || + Rule.getAction() == MoreElements || + Rule.getAction() == FewerElements) && + "Simple loop detected"); + return {Rule.getAction(), Mutation.first, Mutation.second}; + } else + LLVM_DEBUG(dbgs() << ".. no match\n"); + } + LLVM_DEBUG(dbgs() << ".. unsupported\n"); + return {LegalizeAction::Unsupported, 0, LLT{}}; +} + +bool LegalizeRuleSet::verifyTypeIdxsCoverage(unsigned NumTypeIdxs) const { +#ifndef NDEBUG + if (Rules.empty()) { + LLVM_DEBUG( + dbgs() << ".. type index coverage check SKIPPED: no rules defined\n"); + return true; + } + const int64_t FirstUncovered = TypeIdxsCovered.find_first_unset(); + if (FirstUncovered < 0) { + LLVM_DEBUG(dbgs() << ".. type index coverage check SKIPPED:" + " user-defined predicate detected\n"); + return true; + } + const bool AllCovered = (FirstUncovered >= NumTypeIdxs); + LLVM_DEBUG(dbgs() << ".. the first uncovered type index: " << FirstUncovered + << ", " << (AllCovered ? "OK" : "FAIL") << "\n"); + return AllCovered; +#else + return true; +#endif +} LegalizerInfo::LegalizerInfo() : TablesInitialized(false) { // Set defaults. @@ -104,15 +179,16 @@ void LegalizerInfo::computeTables() { if (TypeIdx < ScalarSizeChangeStrategies[OpcodeIdx].size() && ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx] != nullptr) S = ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx]; - std::sort(ScalarSpecifiedActions.begin(), ScalarSpecifiedActions.end()); + llvm::sort(ScalarSpecifiedActions.begin(), + ScalarSpecifiedActions.end()); checkPartialSizeAndActionsVector(ScalarSpecifiedActions); setScalarAction(Opcode, TypeIdx, S(ScalarSpecifiedActions)); } // 2. Handle pointer types for (auto PointerSpecifiedActions : AddressSpace2SpecifiedActions) { - std::sort(PointerSpecifiedActions.second.begin(), - PointerSpecifiedActions.second.end()); + llvm::sort(PointerSpecifiedActions.second.begin(), + PointerSpecifiedActions.second.end()); checkPartialSizeAndActionsVector(PointerSpecifiedActions.second); // For pointer types, we assume that there isn't a meaningfull way // to change the number of bits used in the pointer. @@ -124,8 +200,8 @@ void LegalizerInfo::computeTables() { // 3. Handle vector types SizeAndActionsVec ElementSizesSeen; for (auto VectorSpecifiedActions : ElemSize2SpecifiedActions) { - std::sort(VectorSpecifiedActions.second.begin(), - VectorSpecifiedActions.second.end()); + llvm::sort(VectorSpecifiedActions.second.begin(), + VectorSpecifiedActions.second.end()); const uint16_t ElementSize = VectorSpecifiedActions.first; ElementSizesSeen.push_back({ElementSize, Legal}); checkPartialSizeAndActionsVector(VectorSpecifiedActions.second); @@ -143,7 +219,7 @@ void LegalizerInfo::computeTables() { Opcode, TypeIdx, ElementSize, moreToWiderTypesAndLessToWidest(NumElementsActions)); } - std::sort(ElementSizesSeen.begin(), ElementSizesSeen.end()); + llvm::sort(ElementSizesSeen.begin(), ElementSizesSeen.end()); SizeChangeStrategy VectorElementSizeChangeStrategy = &unsupportedForDifferentSizes; if (TypeIdx < VectorElementSizeChangeStrategies[OpcodeIdx].size() && @@ -162,8 +238,8 @@ void LegalizerInfo::computeTables() { // probably going to need specialized lookup structures for various types before // we have any hope of doing well with something like <13 x i3>. Even the common // cases should do better than what we have now. -std::pair<LegalizerInfo::LegalizeAction, LLT> -LegalizerInfo::getAction(const InstrAspect &Aspect) const { +std::pair<LegalizeAction, LLT> +LegalizerInfo::getAspectAction(const InstrAspect &Aspect) const { assert(TablesInitialized && "backend forgot to call computeTables"); // These *have* to be implemented for now, they're the fundamental basis of // how everything else is transformed. @@ -186,9 +262,87 @@ static LLT getTypeFromTypeIdx(const MachineInstr &MI, return MRI.getType(MI.getOperand(OpIdx).getReg()); } -std::tuple<LegalizerInfo::LegalizeAction, unsigned, LLT> +unsigned LegalizerInfo::getOpcodeIdxForOpcode(unsigned Opcode) const { + assert(Opcode >= FirstOp && Opcode <= LastOp && "Unsupported opcode"); + return Opcode - FirstOp; +} + +unsigned LegalizerInfo::getActionDefinitionsIdx(unsigned Opcode) const { + unsigned OpcodeIdx = getOpcodeIdxForOpcode(Opcode); + if (unsigned Alias = RulesForOpcode[OpcodeIdx].getAlias()) { + LLVM_DEBUG(dbgs() << ".. opcode " << Opcode << " is aliased to " << Alias + << "\n"); + OpcodeIdx = getOpcodeIdxForOpcode(Alias); + LLVM_DEBUG(dbgs() << ".. opcode " << Alias << " is aliased to " + << RulesForOpcode[OpcodeIdx].getAlias() << "\n"); + assert(RulesForOpcode[OpcodeIdx].getAlias() == 0 && "Cannot chain aliases"); + } + + return OpcodeIdx; +} + +const LegalizeRuleSet & +LegalizerInfo::getActionDefinitions(unsigned Opcode) const { + unsigned OpcodeIdx = getActionDefinitionsIdx(Opcode); + return RulesForOpcode[OpcodeIdx]; +} + +LegalizeRuleSet &LegalizerInfo::getActionDefinitionsBuilder(unsigned Opcode) { + unsigned OpcodeIdx = getActionDefinitionsIdx(Opcode); + auto &Result = RulesForOpcode[OpcodeIdx]; + assert(!Result.isAliasedByAnother() && "Modifying this opcode will modify aliases"); + return Result; +} + +LegalizeRuleSet &LegalizerInfo::getActionDefinitionsBuilder( + std::initializer_list<unsigned> Opcodes) { + unsigned Representative = *Opcodes.begin(); + + assert(Opcodes.begin() != Opcodes.end() && + Opcodes.begin() + 1 != Opcodes.end() && + "Initializer list must have at least two opcodes"); + + for (auto I = Opcodes.begin() + 1, E = Opcodes.end(); I != E; ++I) + aliasActionDefinitions(Representative, *I); + + auto &Return = getActionDefinitionsBuilder(Representative); + Return.setIsAliasedByAnother(); + return Return; +} + +void LegalizerInfo::aliasActionDefinitions(unsigned OpcodeTo, + unsigned OpcodeFrom) { + assert(OpcodeTo != OpcodeFrom && "Cannot alias to self"); + assert(OpcodeTo >= FirstOp && OpcodeTo <= LastOp && "Unsupported opcode"); + const unsigned OpcodeFromIdx = getOpcodeIdxForOpcode(OpcodeFrom); + RulesForOpcode[OpcodeFromIdx].aliasTo(OpcodeTo); +} + +LegalizeActionStep +LegalizerInfo::getAction(const LegalityQuery &Query) const { + LegalizeActionStep Step = getActionDefinitions(Query.Opcode).apply(Query); + if (Step.Action != LegalizeAction::UseLegacyRules) { + return Step; + } + + for (unsigned i = 0; i < Query.Types.size(); ++i) { + auto Action = getAspectAction({Query.Opcode, i, Query.Types[i]}); + if (Action.first != Legal) { + LLVM_DEBUG(dbgs() << ".. (legacy) Type " << i + << " Action=" << (unsigned)Action.first << ", " + << Action.second << "\n"); + return {Action.first, i, Action.second}; + } else + LLVM_DEBUG(dbgs() << ".. (legacy) Type " << i << " Legal\n"); + } + LLVM_DEBUG(dbgs() << ".. (legacy) Legal\n"); + return {Legal, 0, LLT{}}; +} + +LegalizeActionStep LegalizerInfo::getAction(const MachineInstr &MI, const MachineRegisterInfo &MRI) const { + SmallVector<LLT, 2> Types; SmallBitVector SeenTypes(8); const MCOperandInfo *OpInfo = MI.getDesc().OpInfo; // FIXME: probably we'll need to cache the results here somehow? @@ -205,16 +359,20 @@ LegalizerInfo::getAction(const MachineInstr &MI, SeenTypes.set(TypeIdx); LLT Ty = getTypeFromTypeIdx(MI, MRI, i, TypeIdx); - auto Action = getAction({MI.getOpcode(), TypeIdx, Ty}); - if (Action.first != Legal) - return std::make_tuple(Action.first, TypeIdx, Action.second); + Types.push_back(Ty); } - return std::make_tuple(Legal, 0, LLT{}); + + SmallVector<LegalityQuery::MemDesc, 2> MemDescrs; + for (const auto &MMO : MI.memoperands()) + MemDescrs.push_back( + {MMO->getSize() /* in bytes */ * 8, MMO->getOrdering()}); + + return getAction({MI.getOpcode(), Types, MemDescrs}); } bool LegalizerInfo::isLegal(const MachineInstr &MI, const MachineRegisterInfo &MRI) const { - return std::get<0>(getAction(MI, MRI)) == Legal; + return getAction(MI, MRI).Action == Legal; } bool LegalizerInfo::legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, @@ -312,17 +470,18 @@ LegalizerInfo::findAction(const SizeAndActionsVec &Vec, const uint32_t Size) { case Unsupported: return {Size, Unsupported}; case NotFound: + case UseLegacyRules: llvm_unreachable("NotFound"); } llvm_unreachable("Action has an unknown enum value"); } -std::pair<LegalizerInfo::LegalizeAction, LLT> +std::pair<LegalizeAction, LLT> LegalizerInfo::findScalarLegalAction(const InstrAspect &Aspect) const { assert(Aspect.Type.isScalar() || Aspect.Type.isPointer()); if (Aspect.Opcode < FirstOp || Aspect.Opcode > LastOp) return {NotFound, LLT()}; - const unsigned OpcodeIdx = Aspect.Opcode - FirstOp; + const unsigned OpcodeIdx = getOpcodeIdxForOpcode(Aspect.Opcode); if (Aspect.Type.isPointer() && AddrSpace2PointerActions[OpcodeIdx].find(Aspect.Type.getAddressSpace()) == AddrSpace2PointerActions[OpcodeIdx].end()) { @@ -346,14 +505,14 @@ LegalizerInfo::findScalarLegalAction(const InstrAspect &Aspect) const { SizeAndAction.first)}; } -std::pair<LegalizerInfo::LegalizeAction, LLT> +std::pair<LegalizeAction, LLT> LegalizerInfo::findVectorLegalAction(const InstrAspect &Aspect) const { assert(Aspect.Type.isVector()); // First legalize the vector element size, then legalize the number of // lanes in the vector. if (Aspect.Opcode < FirstOp || Aspect.Opcode > LastOp) return {NotFound, Aspect.Type}; - const unsigned OpcodeIdx = Aspect.Opcode - FirstOp; + const unsigned OpcodeIdx = getOpcodeIdxForOpcode(Aspect.Opcode); const unsigned TypeIdx = Aspect.Idx; if (TypeIdx >= ScalarInVectorActions[OpcodeIdx].size()) return {NotFound, Aspect.Type}; @@ -380,3 +539,53 @@ LegalizerInfo::findVectorLegalAction(const InstrAspect &Aspect) const { LLT::vector(NumElementsAndAction.first, IntermediateType.getScalarSizeInBits())}; } + +/// \pre Type indices of every opcode form a dense set starting from 0. +void LegalizerInfo::verify(const MCInstrInfo &MII) const { +#ifndef NDEBUG + std::vector<unsigned> FailedOpcodes; + for (unsigned Opcode = FirstOp; Opcode <= LastOp; ++Opcode) { + const MCInstrDesc &MCID = MII.get(Opcode); + const unsigned NumTypeIdxs = std::accumulate( + MCID.opInfo_begin(), MCID.opInfo_end(), 0U, + [](unsigned Acc, const MCOperandInfo &OpInfo) { + return OpInfo.isGenericType() + ? std::max(OpInfo.getGenericTypeIndex() + 1U, Acc) + : Acc; + }); + LLVM_DEBUG(dbgs() << MII.getName(Opcode) << " (opcode " << Opcode + << "): " << NumTypeIdxs << " type ind" + << (NumTypeIdxs == 1 ? "ex" : "ices") << "\n"); + const LegalizeRuleSet &RuleSet = getActionDefinitions(Opcode); + if (!RuleSet.verifyTypeIdxsCoverage(NumTypeIdxs)) + FailedOpcodes.push_back(Opcode); + } + if (!FailedOpcodes.empty()) { + errs() << "The following opcodes have ill-defined legalization rules:"; + for (unsigned Opcode : FailedOpcodes) + errs() << " " << MII.getName(Opcode); + errs() << "\n"; + + report_fatal_error("ill-defined LegalizerInfo" + ", try -debug-only=legalizer-info for details"); + } +#endif +} + +#ifndef NDEBUG +// FIXME: This should be in the MachineVerifier, but it can't use the +// LegalizerInfo as it's currently in the separate GlobalISel library. +// Note that RegBankSelected property already checked in the verifier +// has the same layering problem, but we only use inline methods so +// end up not needing to link against the GlobalISel library. +const MachineInstr *llvm::machineFunctionIsIllegal(const MachineFunction &MF) { + if (const LegalizerInfo *MLI = MF.getSubtarget().getLegalizerInfo()) { + const MachineRegisterInfo &MRI = MF.getRegInfo(); + for (const MachineBasicBlock &MBB : MF) + for (const MachineInstr &MI : MBB) + if (isPreISelGenericOpcode(MI.getOpcode()) && !MLI->isLegal(MI, MRI)) + return &MI; + } + return nullptr; +} +#endif diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/Localizer.cpp index 8e16470b6f90..52b340753a50 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/Localizer.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/Localizer.cpp @@ -44,6 +44,11 @@ bool Localizer::shouldLocalize(const MachineInstr &MI) { } } +void Localizer::getAnalysisUsage(AnalysisUsage &AU) const { + getSelectionDAGFallbackAnalysisUsage(AU); + MachineFunctionPass::getAnalysisUsage(AU); +} + bool Localizer::isLocalUse(MachineOperand &MOUse, const MachineInstr &Def, MachineBasicBlock *&InsertMBB) { MachineInstr &MIUse = *MOUse.getParent(); @@ -59,7 +64,7 @@ bool Localizer::runOnMachineFunction(MachineFunction &MF) { MachineFunctionProperties::Property::FailedISel)) return false; - DEBUG(dbgs() << "Localize instructions for: " << MF.getName() << '\n'); + LLVM_DEBUG(dbgs() << "Localize instructions for: " << MF.getName() << '\n'); init(MF); @@ -73,7 +78,7 @@ bool Localizer::runOnMachineFunction(MachineFunction &MF) { for (MachineInstr &MI : MBB) { if (LocalizedInstrs.count(&MI) || !shouldLocalize(MI)) continue; - DEBUG(dbgs() << "Should localize: " << MI); + LLVM_DEBUG(dbgs() << "Should localize: " << MI); assert(MI.getDesc().getNumDefs() == 1 && "More than one definition not supported yet"); unsigned Reg = MI.getOperand(0).getReg(); @@ -85,12 +90,12 @@ bool Localizer::runOnMachineFunction(MachineFunction &MF) { MachineOperand &MOUse = *MOIt++; // Check if the use is already local. MachineBasicBlock *InsertMBB; - DEBUG(MachineInstr &MIUse = *MOUse.getParent(); - dbgs() << "Checking use: " << MIUse - << " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n'); + LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent(); + dbgs() << "Checking use: " << MIUse + << " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n'); if (isLocalUse(MOUse, MI, InsertMBB)) continue; - DEBUG(dbgs() << "Fixing non-local use\n"); + LLVM_DEBUG(dbgs() << "Fixing non-local use\n"); Changed = true; auto MBBAndReg = std::make_pair(InsertMBB, Reg); auto NewVRegIt = MBBWithLocalDef.find(MBBAndReg); @@ -111,10 +116,10 @@ bool Localizer::runOnMachineFunction(MachineFunction &MF) { LocalizedMI->getOperand(0).setReg(NewReg); NewVRegIt = MBBWithLocalDef.insert(std::make_pair(MBBAndReg, NewReg)).first; - DEBUG(dbgs() << "Inserted: " << *LocalizedMI); + LLVM_DEBUG(dbgs() << "Inserted: " << *LocalizedMI); } - DEBUG(dbgs() << "Update use with: " << printReg(NewVRegIt->second) - << '\n'); + LLVM_DEBUG(dbgs() << "Update use with: " << printReg(NewVRegIt->second) + << '\n'); // Update the user reg. MOUse.setReg(NewVRegIt->second); } diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 475bb82e5b9c..9df931eb81b3 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -22,96 +22,103 @@ using namespace llvm; -void MachineIRBuilder::setMF(MachineFunction &MF) { - this->MF = &MF; - this->MBB = nullptr; - this->MRI = &MF.getRegInfo(); - this->TII = MF.getSubtarget().getInstrInfo(); - this->DL = DebugLoc(); - this->II = MachineBasicBlock::iterator(); - this->InsertedInstr = nullptr; -} - -void MachineIRBuilder::setMBB(MachineBasicBlock &MBB) { - this->MBB = &MBB; - this->II = MBB.end(); +void MachineIRBuilderBase::setMF(MachineFunction &MF) { + State.MF = &MF; + State.MBB = nullptr; + State.MRI = &MF.getRegInfo(); + State.TII = MF.getSubtarget().getInstrInfo(); + State.DL = DebugLoc(); + State.II = MachineBasicBlock::iterator(); + State.InsertedInstr = nullptr; +} + +void MachineIRBuilderBase::setMBB(MachineBasicBlock &MBB) { + State.MBB = &MBB; + State.II = MBB.end(); assert(&getMF() == MBB.getParent() && "Basic block is in a different function"); } -void MachineIRBuilder::setInstr(MachineInstr &MI) { +void MachineIRBuilderBase::setInstr(MachineInstr &MI) { assert(MI.getParent() && "Instruction is not part of a basic block"); setMBB(*MI.getParent()); - this->II = MI.getIterator(); + State.II = MI.getIterator(); } -void MachineIRBuilder::setInsertPt(MachineBasicBlock &MBB, - MachineBasicBlock::iterator II) { +void MachineIRBuilderBase::setInsertPt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator II) { assert(MBB.getParent() == &getMF() && "Basic block is in a different function"); - this->MBB = &MBB; - this->II = II; + State.MBB = &MBB; + State.II = II; } -void MachineIRBuilder::recordInsertions( +void MachineIRBuilderBase::recordInsertion(MachineInstr *InsertedInstr) const { + if (State.InsertedInstr) + State.InsertedInstr(InsertedInstr); +} + +void MachineIRBuilderBase::recordInsertions( std::function<void(MachineInstr *)> Inserted) { - InsertedInstr = std::move(Inserted); + State.InsertedInstr = std::move(Inserted); } -void MachineIRBuilder::stopRecordingInsertions() { - InsertedInstr = nullptr; +void MachineIRBuilderBase::stopRecordingInsertions() { + State.InsertedInstr = nullptr; } //------------------------------------------------------------------------------ // Build instruction variants. //------------------------------------------------------------------------------ -MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opcode) { +MachineInstrBuilder MachineIRBuilderBase::buildInstr(unsigned Opcode) { return insertInstr(buildInstrNoInsert(Opcode)); } -MachineInstrBuilder MachineIRBuilder::buildInstrNoInsert(unsigned Opcode) { - MachineInstrBuilder MIB = BuildMI(getMF(), DL, getTII().get(Opcode)); +MachineInstrBuilder MachineIRBuilderBase::buildInstrNoInsert(unsigned Opcode) { + MachineInstrBuilder MIB = BuildMI(getMF(), getDL(), getTII().get(Opcode)); return MIB; } - -MachineInstrBuilder MachineIRBuilder::insertInstr(MachineInstrBuilder MIB) { +MachineInstrBuilder MachineIRBuilderBase::insertInstr(MachineInstrBuilder MIB) { getMBB().insert(getInsertPt(), MIB); - if (InsertedInstr) - InsertedInstr(MIB); + recordInsertion(MIB); return MIB; } MachineInstrBuilder -MachineIRBuilder::buildDirectDbgValue(unsigned Reg, const MDNode *Variable, - const MDNode *Expr) { +MachineIRBuilderBase::buildDirectDbgValue(unsigned Reg, const MDNode *Variable, + const MDNode *Expr) { assert(isa<DILocalVariable>(Variable) && "not a variable"); assert(cast<DIExpression>(Expr)->isValid() && "not an expression"); - assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) && - "Expected inlined-at fields to agree"); - return insertInstr(BuildMI(getMF(), DL, getTII().get(TargetOpcode::DBG_VALUE), + assert( + cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(getDL()) && + "Expected inlined-at fields to agree"); + return insertInstr(BuildMI(getMF(), getDL(), + getTII().get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ false, Reg, Variable, Expr)); } -MachineInstrBuilder -MachineIRBuilder::buildIndirectDbgValue(unsigned Reg, const MDNode *Variable, - const MDNode *Expr) { +MachineInstrBuilder MachineIRBuilderBase::buildIndirectDbgValue( + unsigned Reg, const MDNode *Variable, const MDNode *Expr) { assert(isa<DILocalVariable>(Variable) && "not a variable"); assert(cast<DIExpression>(Expr)->isValid() && "not an expression"); - assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) && - "Expected inlined-at fields to agree"); - return insertInstr(BuildMI(getMF(), DL, getTII().get(TargetOpcode::DBG_VALUE), + assert( + cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(getDL()) && + "Expected inlined-at fields to agree"); + return insertInstr(BuildMI(getMF(), getDL(), + getTII().get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true, Reg, Variable, Expr)); } -MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI, - const MDNode *Variable, - const MDNode *Expr) { +MachineInstrBuilder +MachineIRBuilderBase::buildFIDbgValue(int FI, const MDNode *Variable, + const MDNode *Expr) { assert(isa<DILocalVariable>(Variable) && "not a variable"); assert(cast<DIExpression>(Expr)->isValid() && "not an expression"); - assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) && - "Expected inlined-at fields to agree"); + assert( + cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(getDL()) && + "Expected inlined-at fields to agree"); return buildInstr(TargetOpcode::DBG_VALUE) .addFrameIndex(FI) .addImm(0) @@ -119,13 +126,13 @@ MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI, .addMetadata(Expr); } -MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C, - const MDNode *Variable, - const MDNode *Expr) { +MachineInstrBuilder MachineIRBuilderBase::buildConstDbgValue( + const Constant &C, const MDNode *Variable, const MDNode *Expr) { assert(isa<DILocalVariable>(Variable) && "not a variable"); assert(cast<DIExpression>(Expr)->isValid() && "not an expression"); - assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) && - "Expected inlined-at fields to agree"); + assert( + cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(getDL()) && + "Expected inlined-at fields to agree"); auto MIB = buildInstr(TargetOpcode::DBG_VALUE); if (auto *CI = dyn_cast<ConstantInt>(&C)) { if (CI->getBitWidth() > 64) @@ -142,17 +149,18 @@ MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C, return MIB.addImm(0).addMetadata(Variable).addMetadata(Expr); } -MachineInstrBuilder MachineIRBuilder::buildFrameIndex(unsigned Res, int Idx) { - assert(MRI->getType(Res).isPointer() && "invalid operand type"); +MachineInstrBuilder MachineIRBuilderBase::buildFrameIndex(unsigned Res, + int Idx) { + assert(getMRI()->getType(Res).isPointer() && "invalid operand type"); return buildInstr(TargetOpcode::G_FRAME_INDEX) .addDef(Res) .addFrameIndex(Idx); } -MachineInstrBuilder MachineIRBuilder::buildGlobalValue(unsigned Res, - const GlobalValue *GV) { - assert(MRI->getType(Res).isPointer() && "invalid operand type"); - assert(MRI->getType(Res).getAddressSpace() == +MachineInstrBuilder +MachineIRBuilderBase::buildGlobalValue(unsigned Res, const GlobalValue *GV) { + assert(getMRI()->getType(Res).isPointer() && "invalid operand type"); + assert(getMRI()->getType(Res).getAddressSpace() == GV->getType()->getAddressSpace() && "address space mismatch"); @@ -161,29 +169,20 @@ MachineInstrBuilder MachineIRBuilder::buildGlobalValue(unsigned Res, .addGlobalAddress(GV); } -MachineInstrBuilder MachineIRBuilder::buildBinaryOp(unsigned Opcode, unsigned Res, unsigned Op0, - unsigned Op1) { - assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) && +void MachineIRBuilderBase::validateBinaryOp(unsigned Res, unsigned Op0, + unsigned Op1) { + assert((getMRI()->getType(Res).isScalar() || + getMRI()->getType(Res).isVector()) && "invalid operand type"); - assert(MRI->getType(Res) == MRI->getType(Op0) && - MRI->getType(Res) == MRI->getType(Op1) && "type mismatch"); - - return buildInstr(Opcode) - .addDef(Res) - .addUse(Op0) - .addUse(Op1); -} - -MachineInstrBuilder MachineIRBuilder::buildAdd(unsigned Res, unsigned Op0, - unsigned Op1) { - return buildBinaryOp(TargetOpcode::G_ADD, Res, Op0, Op1); + assert(getMRI()->getType(Res) == getMRI()->getType(Op0) && + getMRI()->getType(Res) == getMRI()->getType(Op1) && "type mismatch"); } -MachineInstrBuilder MachineIRBuilder::buildGEP(unsigned Res, unsigned Op0, - unsigned Op1) { - assert(MRI->getType(Res).isPointer() && - MRI->getType(Res) == MRI->getType(Op0) && "type mismatch"); - assert(MRI->getType(Op1).isScalar() && "invalid offset type"); +MachineInstrBuilder MachineIRBuilderBase::buildGEP(unsigned Res, unsigned Op0, + unsigned Op1) { + assert(getMRI()->getType(Res).isPointer() && + getMRI()->getType(Res) == getMRI()->getType(Op0) && "type mismatch"); + assert(getMRI()->getType(Op1).isScalar() && "invalid offset type"); return buildInstr(TargetOpcode::G_GEP) .addDef(Res) @@ -192,8 +191,8 @@ MachineInstrBuilder MachineIRBuilder::buildGEP(unsigned Res, unsigned Op0, } Optional<MachineInstrBuilder> -MachineIRBuilder::materializeGEP(unsigned &Res, unsigned Op0, - const LLT &ValueTy, uint64_t Value) { +MachineIRBuilderBase::materializeGEP(unsigned &Res, unsigned Op0, + const LLT &ValueTy, uint64_t Value) { assert(Res == 0 && "Res is a result argument"); assert(ValueTy.isScalar() && "invalid offset type"); @@ -202,17 +201,18 @@ MachineIRBuilder::materializeGEP(unsigned &Res, unsigned Op0, return None; } - Res = MRI->createGenericVirtualRegister(MRI->getType(Op0)); - unsigned TmpReg = MRI->createGenericVirtualRegister(ValueTy); + Res = getMRI()->createGenericVirtualRegister(getMRI()->getType(Op0)); + unsigned TmpReg = getMRI()->createGenericVirtualRegister(ValueTy); buildConstant(TmpReg, Value); return buildGEP(Res, Op0, TmpReg); } -MachineInstrBuilder MachineIRBuilder::buildPtrMask(unsigned Res, unsigned Op0, - uint32_t NumBits) { - assert(MRI->getType(Res).isPointer() && - MRI->getType(Res) == MRI->getType(Op0) && "type mismatch"); +MachineInstrBuilder MachineIRBuilderBase::buildPtrMask(unsigned Res, + unsigned Op0, + uint32_t NumBits) { + assert(getMRI()->getType(Res).isPointer() && + getMRI()->getType(Res) == getMRI()->getType(Op0) && "type mismatch"); return buildInstr(TargetOpcode::G_PTR_MASK) .addDef(Res) @@ -220,92 +220,88 @@ MachineInstrBuilder MachineIRBuilder::buildPtrMask(unsigned Res, unsigned Op0, .addImm(NumBits); } -MachineInstrBuilder MachineIRBuilder::buildSub(unsigned Res, unsigned Op0, - unsigned Op1) { - return buildBinaryOp(TargetOpcode::G_SUB, Res, Op0, Op1); -} - -MachineInstrBuilder MachineIRBuilder::buildMul(unsigned Res, unsigned Op0, - unsigned Op1) { - return buildBinaryOp(TargetOpcode::G_MUL, Res, Op0, Op1); -} - -MachineInstrBuilder MachineIRBuilder::buildAnd(unsigned Res, unsigned Op0, - unsigned Op1) { - return buildBinaryOp(TargetOpcode::G_AND, Res, Op0, Op1); -} - -MachineInstrBuilder MachineIRBuilder::buildOr(unsigned Res, unsigned Op0, - unsigned Op1) { - return buildBinaryOp(TargetOpcode::G_OR, Res, Op0, Op1); -} - -MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) { +MachineInstrBuilder MachineIRBuilderBase::buildBr(MachineBasicBlock &Dest) { return buildInstr(TargetOpcode::G_BR).addMBB(&Dest); } -MachineInstrBuilder MachineIRBuilder::buildBrIndirect(unsigned Tgt) { - assert(MRI->getType(Tgt).isPointer() && "invalid branch destination"); +MachineInstrBuilder MachineIRBuilderBase::buildBrIndirect(unsigned Tgt) { + assert(getMRI()->getType(Tgt).isPointer() && "invalid branch destination"); return buildInstr(TargetOpcode::G_BRINDIRECT).addUse(Tgt); } -MachineInstrBuilder MachineIRBuilder::buildCopy(unsigned Res, unsigned Op) { - assert(MRI->getType(Res) == LLT() || MRI->getType(Op) == LLT() || - MRI->getType(Res) == MRI->getType(Op)); +MachineInstrBuilder MachineIRBuilderBase::buildCopy(unsigned Res, unsigned Op) { + assert(getMRI()->getType(Res) == LLT() || getMRI()->getType(Op) == LLT() || + getMRI()->getType(Res) == getMRI()->getType(Op)); return buildInstr(TargetOpcode::COPY).addDef(Res).addUse(Op); } -MachineInstrBuilder MachineIRBuilder::buildConstant(unsigned Res, - const ConstantInt &Val) { - LLT Ty = MRI->getType(Res); +MachineInstrBuilder +MachineIRBuilderBase::buildConstant(unsigned Res, const ConstantInt &Val) { + LLT Ty = getMRI()->getType(Res); assert((Ty.isScalar() || Ty.isPointer()) && "invalid operand type"); const ConstantInt *NewVal = &Val; if (Ty.getSizeInBits() != Val.getBitWidth()) - NewVal = ConstantInt::get(MF->getFunction().getContext(), + NewVal = ConstantInt::get(getMF().getFunction().getContext(), Val.getValue().sextOrTrunc(Ty.getSizeInBits())); return buildInstr(TargetOpcode::G_CONSTANT).addDef(Res).addCImm(NewVal); } -MachineInstrBuilder MachineIRBuilder::buildConstant(unsigned Res, - int64_t Val) { - auto IntN = IntegerType::get(MF->getFunction().getContext(), - MRI->getType(Res).getSizeInBits()); +MachineInstrBuilder MachineIRBuilderBase::buildConstant(unsigned Res, + int64_t Val) { + auto IntN = IntegerType::get(getMF().getFunction().getContext(), + getMRI()->getType(Res).getSizeInBits()); ConstantInt *CI = ConstantInt::get(IntN, Val, true); return buildConstant(Res, *CI); } -MachineInstrBuilder MachineIRBuilder::buildFConstant(unsigned Res, - const ConstantFP &Val) { - assert(MRI->getType(Res).isScalar() && "invalid operand type"); +MachineInstrBuilder +MachineIRBuilderBase::buildFConstant(unsigned Res, const ConstantFP &Val) { + assert(getMRI()->getType(Res).isScalar() && "invalid operand type"); return buildInstr(TargetOpcode::G_FCONSTANT).addDef(Res).addFPImm(&Val); } -MachineInstrBuilder MachineIRBuilder::buildBrCond(unsigned Tst, - MachineBasicBlock &Dest) { - assert(MRI->getType(Tst).isScalar() && "invalid operand type"); +MachineInstrBuilder MachineIRBuilderBase::buildFConstant(unsigned Res, + double Val) { + LLT DstTy = getMRI()->getType(Res); + auto &Ctx = getMF().getFunction().getContext(); + auto *CFP = + ConstantFP::get(Ctx, getAPFloatFromSize(Val, DstTy.getSizeInBits())); + return buildFConstant(Res, *CFP); +} + +MachineInstrBuilder MachineIRBuilderBase::buildBrCond(unsigned Tst, + MachineBasicBlock &Dest) { + assert(getMRI()->getType(Tst).isScalar() && "invalid operand type"); return buildInstr(TargetOpcode::G_BRCOND).addUse(Tst).addMBB(&Dest); } -MachineInstrBuilder MachineIRBuilder::buildLoad(unsigned Res, unsigned Addr, - MachineMemOperand &MMO) { - assert(MRI->getType(Res).isValid() && "invalid operand type"); - assert(MRI->getType(Addr).isPointer() && "invalid operand type"); +MachineInstrBuilder MachineIRBuilderBase::buildLoad(unsigned Res, unsigned Addr, + MachineMemOperand &MMO) { + return buildLoadInstr(TargetOpcode::G_LOAD, Res, Addr, MMO); +} + +MachineInstrBuilder +MachineIRBuilderBase::buildLoadInstr(unsigned Opcode, unsigned Res, + unsigned Addr, MachineMemOperand &MMO) { + assert(getMRI()->getType(Res).isValid() && "invalid operand type"); + assert(getMRI()->getType(Addr).isPointer() && "invalid operand type"); - return buildInstr(TargetOpcode::G_LOAD) + return buildInstr(Opcode) .addDef(Res) .addUse(Addr) .addMemOperand(&MMO); } -MachineInstrBuilder MachineIRBuilder::buildStore(unsigned Val, unsigned Addr, - MachineMemOperand &MMO) { - assert(MRI->getType(Val).isValid() && "invalid operand type"); - assert(MRI->getType(Addr).isPointer() && "invalid operand type"); +MachineInstrBuilder MachineIRBuilderBase::buildStore(unsigned Val, + unsigned Addr, + MachineMemOperand &MMO) { + assert(getMRI()->getType(Val).isValid() && "invalid operand type"); + assert(getMRI()->getType(Addr).isPointer() && "invalid operand type"); return buildInstr(TargetOpcode::G_STORE) .addUse(Val) @@ -313,15 +309,16 @@ MachineInstrBuilder MachineIRBuilder::buildStore(unsigned Val, unsigned Addr, .addMemOperand(&MMO); } -MachineInstrBuilder MachineIRBuilder::buildUAdde(unsigned Res, - unsigned CarryOut, - unsigned Op0, unsigned Op1, - unsigned CarryIn) { - assert(MRI->getType(Res).isScalar() && "invalid operand type"); - assert(MRI->getType(Res) == MRI->getType(Op0) && - MRI->getType(Res) == MRI->getType(Op1) && "type mismatch"); - assert(MRI->getType(CarryOut).isScalar() && "invalid operand type"); - assert(MRI->getType(CarryOut) == MRI->getType(CarryIn) && "type mismatch"); +MachineInstrBuilder MachineIRBuilderBase::buildUAdde(unsigned Res, + unsigned CarryOut, + unsigned Op0, unsigned Op1, + unsigned CarryIn) { + assert(getMRI()->getType(Res).isScalar() && "invalid operand type"); + assert(getMRI()->getType(Res) == getMRI()->getType(Op0) && + getMRI()->getType(Res) == getMRI()->getType(Op1) && "type mismatch"); + assert(getMRI()->getType(CarryOut).isScalar() && "invalid operand type"); + assert(getMRI()->getType(CarryOut) == getMRI()->getType(CarryIn) && + "type mismatch"); return buildInstr(TargetOpcode::G_UADDE) .addDef(Res) @@ -331,58 +328,64 @@ MachineInstrBuilder MachineIRBuilder::buildUAdde(unsigned Res, .addUse(CarryIn); } -MachineInstrBuilder MachineIRBuilder::buildAnyExt(unsigned Res, unsigned Op) { +MachineInstrBuilder MachineIRBuilderBase::buildAnyExt(unsigned Res, + unsigned Op) { validateTruncExt(Res, Op, true); return buildInstr(TargetOpcode::G_ANYEXT).addDef(Res).addUse(Op); } -MachineInstrBuilder MachineIRBuilder::buildSExt(unsigned Res, unsigned Op) { +MachineInstrBuilder MachineIRBuilderBase::buildSExt(unsigned Res, unsigned Op) { validateTruncExt(Res, Op, true); return buildInstr(TargetOpcode::G_SEXT).addDef(Res).addUse(Op); } -MachineInstrBuilder MachineIRBuilder::buildZExt(unsigned Res, unsigned Op) { +MachineInstrBuilder MachineIRBuilderBase::buildZExt(unsigned Res, unsigned Op) { validateTruncExt(Res, Op, true); return buildInstr(TargetOpcode::G_ZEXT).addDef(Res).addUse(Op); } -MachineInstrBuilder -MachineIRBuilder::buildExtOrTrunc(unsigned ExtOpc, unsigned Res, unsigned Op) { +MachineInstrBuilder MachineIRBuilderBase::buildExtOrTrunc(unsigned ExtOpc, + unsigned Res, + unsigned Op) { assert((TargetOpcode::G_ANYEXT == ExtOpc || TargetOpcode::G_ZEXT == ExtOpc || TargetOpcode::G_SEXT == ExtOpc) && "Expecting Extending Opc"); - assert(MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()); - assert(MRI->getType(Res).isScalar() == MRI->getType(Op).isScalar()); + assert(getMRI()->getType(Res).isScalar() || + getMRI()->getType(Res).isVector()); + assert(getMRI()->getType(Res).isScalar() == getMRI()->getType(Op).isScalar()); unsigned Opcode = TargetOpcode::COPY; - if (MRI->getType(Res).getSizeInBits() > MRI->getType(Op).getSizeInBits()) + if (getMRI()->getType(Res).getSizeInBits() > + getMRI()->getType(Op).getSizeInBits()) Opcode = ExtOpc; - else if (MRI->getType(Res).getSizeInBits() < MRI->getType(Op).getSizeInBits()) + else if (getMRI()->getType(Res).getSizeInBits() < + getMRI()->getType(Op).getSizeInBits()) Opcode = TargetOpcode::G_TRUNC; else - assert(MRI->getType(Res) == MRI->getType(Op)); + assert(getMRI()->getType(Res) == getMRI()->getType(Op)); return buildInstr(Opcode).addDef(Res).addUse(Op); } -MachineInstrBuilder MachineIRBuilder::buildSExtOrTrunc(unsigned Res, - unsigned Op) { +MachineInstrBuilder MachineIRBuilderBase::buildSExtOrTrunc(unsigned Res, + unsigned Op) { return buildExtOrTrunc(TargetOpcode::G_SEXT, Res, Op); } -MachineInstrBuilder MachineIRBuilder::buildZExtOrTrunc(unsigned Res, - unsigned Op) { +MachineInstrBuilder MachineIRBuilderBase::buildZExtOrTrunc(unsigned Res, + unsigned Op) { return buildExtOrTrunc(TargetOpcode::G_ZEXT, Res, Op); } -MachineInstrBuilder MachineIRBuilder::buildAnyExtOrTrunc(unsigned Res, - unsigned Op) { +MachineInstrBuilder MachineIRBuilderBase::buildAnyExtOrTrunc(unsigned Res, + unsigned Op) { return buildExtOrTrunc(TargetOpcode::G_ANYEXT, Res, Op); } -MachineInstrBuilder MachineIRBuilder::buildCast(unsigned Dst, unsigned Src) { - LLT SrcTy = MRI->getType(Src); - LLT DstTy = MRI->getType(Dst); +MachineInstrBuilder MachineIRBuilderBase::buildCast(unsigned Dst, + unsigned Src) { + LLT SrcTy = getMRI()->getType(Src); + LLT DstTy = getMRI()->getType(Dst); if (SrcTy == DstTy) return buildCopy(Dst, Src); @@ -399,17 +402,18 @@ MachineInstrBuilder MachineIRBuilder::buildCast(unsigned Dst, unsigned Src) { return buildInstr(Opcode).addDef(Dst).addUse(Src); } -MachineInstrBuilder MachineIRBuilder::buildExtract(unsigned Res, unsigned Src, - uint64_t Index) { +MachineInstrBuilder +MachineIRBuilderBase::buildExtract(unsigned Res, unsigned Src, uint64_t Index) { #ifndef NDEBUG - assert(MRI->getType(Src).isValid() && "invalid operand type"); - assert(MRI->getType(Res).isValid() && "invalid operand type"); - assert(Index + MRI->getType(Res).getSizeInBits() <= - MRI->getType(Src).getSizeInBits() && + assert(getMRI()->getType(Src).isValid() && "invalid operand type"); + assert(getMRI()->getType(Res).isValid() && "invalid operand type"); + assert(Index + getMRI()->getType(Res).getSizeInBits() <= + getMRI()->getType(Src).getSizeInBits() && "extracting off end of register"); #endif - if (MRI->getType(Res).getSizeInBits() == MRI->getType(Src).getSizeInBits()) { + if (getMRI()->getType(Res).getSizeInBits() == + getMRI()->getType(Src).getSizeInBits()) { assert(Index == 0 && "insertion past the end of a register"); return buildCast(Res, Src); } @@ -420,25 +424,25 @@ MachineInstrBuilder MachineIRBuilder::buildExtract(unsigned Res, unsigned Src, .addImm(Index); } -void MachineIRBuilder::buildSequence(unsigned Res, ArrayRef<unsigned> Ops, - ArrayRef<uint64_t> Indices) { +void MachineIRBuilderBase::buildSequence(unsigned Res, ArrayRef<unsigned> Ops, + ArrayRef<uint64_t> Indices) { #ifndef NDEBUG assert(Ops.size() == Indices.size() && "incompatible args"); assert(!Ops.empty() && "invalid trivial sequence"); assert(std::is_sorted(Indices.begin(), Indices.end()) && "sequence offsets must be in ascending order"); - assert(MRI->getType(Res).isValid() && "invalid operand type"); + assert(getMRI()->getType(Res).isValid() && "invalid operand type"); for (auto Op : Ops) - assert(MRI->getType(Op).isValid() && "invalid operand type"); + assert(getMRI()->getType(Op).isValid() && "invalid operand type"); #endif - LLT ResTy = MRI->getType(Res); - LLT OpTy = MRI->getType(Ops[0]); + LLT ResTy = getMRI()->getType(Res); + LLT OpTy = getMRI()->getType(Ops[0]); unsigned OpSize = OpTy.getSizeInBits(); bool MaybeMerge = true; for (unsigned i = 0; i < Ops.size(); ++i) { - if (MRI->getType(Ops[i]) != OpTy || Indices[i] != i * OpSize) { + if (getMRI()->getType(Ops[i]) != OpTy || Indices[i] != i * OpSize) { MaybeMerge = false; break; } @@ -449,31 +453,32 @@ void MachineIRBuilder::buildSequence(unsigned Res, ArrayRef<unsigned> Ops, return; } - unsigned ResIn = MRI->createGenericVirtualRegister(ResTy); + unsigned ResIn = getMRI()->createGenericVirtualRegister(ResTy); buildUndef(ResIn); for (unsigned i = 0; i < Ops.size(); ++i) { - unsigned ResOut = - i + 1 == Ops.size() ? Res : MRI->createGenericVirtualRegister(ResTy); + unsigned ResOut = i + 1 == Ops.size() + ? Res + : getMRI()->createGenericVirtualRegister(ResTy); buildInsert(ResOut, ResIn, Ops[i], Indices[i]); ResIn = ResOut; } } -MachineInstrBuilder MachineIRBuilder::buildUndef(unsigned Res) { +MachineInstrBuilder MachineIRBuilderBase::buildUndef(unsigned Res) { return buildInstr(TargetOpcode::G_IMPLICIT_DEF).addDef(Res); } -MachineInstrBuilder MachineIRBuilder::buildMerge(unsigned Res, - ArrayRef<unsigned> Ops) { +MachineInstrBuilder MachineIRBuilderBase::buildMerge(unsigned Res, + ArrayRef<unsigned> Ops) { #ifndef NDEBUG assert(!Ops.empty() && "invalid trivial sequence"); - LLT Ty = MRI->getType(Ops[0]); + LLT Ty = getMRI()->getType(Ops[0]); for (auto Reg : Ops) - assert(MRI->getType(Reg) == Ty && "type mismatch in input list"); - assert(Ops.size() * MRI->getType(Ops[0]).getSizeInBits() == - MRI->getType(Res).getSizeInBits() && + assert(getMRI()->getType(Reg) == Ty && "type mismatch in input list"); + assert(Ops.size() * getMRI()->getType(Ops[0]).getSizeInBits() == + getMRI()->getType(Res).getSizeInBits() && "input operands do not cover output register"); #endif @@ -487,16 +492,16 @@ MachineInstrBuilder MachineIRBuilder::buildMerge(unsigned Res, return MIB; } -MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<unsigned> Res, - unsigned Op) { +MachineInstrBuilder MachineIRBuilderBase::buildUnmerge(ArrayRef<unsigned> Res, + unsigned Op) { #ifndef NDEBUG assert(!Res.empty() && "invalid trivial sequence"); - LLT Ty = MRI->getType(Res[0]); + LLT Ty = getMRI()->getType(Res[0]); for (auto Reg : Res) - assert(MRI->getType(Reg) == Ty && "type mismatch in input list"); - assert(Res.size() * MRI->getType(Res[0]).getSizeInBits() == - MRI->getType(Op).getSizeInBits() && + assert(getMRI()->getType(Reg) == Ty && "type mismatch in input list"); + assert(Res.size() * getMRI()->getType(Res[0]).getSizeInBits() == + getMRI()->getType(Op).getSizeInBits() && "input operands do not cover output register"); #endif @@ -507,13 +512,15 @@ MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<unsigned> Res, return MIB; } -MachineInstrBuilder MachineIRBuilder::buildInsert(unsigned Res, unsigned Src, - unsigned Op, unsigned Index) { - assert(Index + MRI->getType(Op).getSizeInBits() <= - MRI->getType(Res).getSizeInBits() && +MachineInstrBuilder MachineIRBuilderBase::buildInsert(unsigned Res, + unsigned Src, unsigned Op, + unsigned Index) { + assert(Index + getMRI()->getType(Op).getSizeInBits() <= + getMRI()->getType(Res).getSizeInBits() && "insertion past the end of a register"); - if (MRI->getType(Res).getSizeInBits() == MRI->getType(Op).getSizeInBits()) { + if (getMRI()->getType(Res).getSizeInBits() == + getMRI()->getType(Op).getSizeInBits()) { return buildCast(Res, Op); } @@ -524,9 +531,9 @@ MachineInstrBuilder MachineIRBuilder::buildInsert(unsigned Res, unsigned Src, .addImm(Index); } -MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID, - unsigned Res, - bool HasSideEffects) { +MachineInstrBuilder MachineIRBuilderBase::buildIntrinsic(Intrinsic::ID ID, + unsigned Res, + bool HasSideEffects) { auto MIB = buildInstr(HasSideEffects ? TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS : TargetOpcode::G_INTRINSIC); @@ -536,28 +543,30 @@ MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID, return MIB; } -MachineInstrBuilder MachineIRBuilder::buildTrunc(unsigned Res, unsigned Op) { +MachineInstrBuilder MachineIRBuilderBase::buildTrunc(unsigned Res, + unsigned Op) { validateTruncExt(Res, Op, false); return buildInstr(TargetOpcode::G_TRUNC).addDef(Res).addUse(Op); } -MachineInstrBuilder MachineIRBuilder::buildFPTrunc(unsigned Res, unsigned Op) { +MachineInstrBuilder MachineIRBuilderBase::buildFPTrunc(unsigned Res, + unsigned Op) { validateTruncExt(Res, Op, false); return buildInstr(TargetOpcode::G_FPTRUNC).addDef(Res).addUse(Op); } -MachineInstrBuilder MachineIRBuilder::buildICmp(CmpInst::Predicate Pred, - unsigned Res, unsigned Op0, - unsigned Op1) { +MachineInstrBuilder MachineIRBuilderBase::buildICmp(CmpInst::Predicate Pred, + unsigned Res, unsigned Op0, + unsigned Op1) { #ifndef NDEBUG - assert(MRI->getType(Op0) == MRI->getType(Op0) && "type mismatch"); + assert(getMRI()->getType(Op0) == getMRI()->getType(Op0) && "type mismatch"); assert(CmpInst::isIntPredicate(Pred) && "invalid predicate"); - if (MRI->getType(Op0).isScalar() || MRI->getType(Op0).isPointer()) - assert(MRI->getType(Res).isScalar() && "type mismatch"); + if (getMRI()->getType(Op0).isScalar() || getMRI()->getType(Op0).isPointer()) + assert(getMRI()->getType(Res).isScalar() && "type mismatch"); else - assert(MRI->getType(Res).isVector() && - MRI->getType(Res).getNumElements() == - MRI->getType(Op0).getNumElements() && + assert(getMRI()->getType(Res).isVector() && + getMRI()->getType(Res).getNumElements() == + getMRI()->getType(Op0).getNumElements() && "type mismatch"); #endif @@ -568,20 +577,21 @@ MachineInstrBuilder MachineIRBuilder::buildICmp(CmpInst::Predicate Pred, .addUse(Op1); } -MachineInstrBuilder MachineIRBuilder::buildFCmp(CmpInst::Predicate Pred, - unsigned Res, unsigned Op0, - unsigned Op1) { +MachineInstrBuilder MachineIRBuilderBase::buildFCmp(CmpInst::Predicate Pred, + unsigned Res, unsigned Op0, + unsigned Op1) { #ifndef NDEBUG - assert((MRI->getType(Op0).isScalar() || MRI->getType(Op0).isVector()) && + assert((getMRI()->getType(Op0).isScalar() || + getMRI()->getType(Op0).isVector()) && "invalid operand type"); - assert(MRI->getType(Op0) == MRI->getType(Op1) && "type mismatch"); + assert(getMRI()->getType(Op0) == getMRI()->getType(Op1) && "type mismatch"); assert(CmpInst::isFPPredicate(Pred) && "invalid predicate"); - if (MRI->getType(Op0).isScalar()) - assert(MRI->getType(Res).isScalar() && "type mismatch"); + if (getMRI()->getType(Op0).isScalar()) + assert(getMRI()->getType(Res).isScalar() && "type mismatch"); else - assert(MRI->getType(Res).isVector() && - MRI->getType(Res).getNumElements() == - MRI->getType(Op0).getNumElements() && + assert(getMRI()->getType(Res).isVector() && + getMRI()->getType(Res).getNumElements() == + getMRI()->getType(Op0).getNumElements() && "type mismatch"); #endif @@ -592,21 +602,23 @@ MachineInstrBuilder MachineIRBuilder::buildFCmp(CmpInst::Predicate Pred, .addUse(Op1); } -MachineInstrBuilder MachineIRBuilder::buildSelect(unsigned Res, unsigned Tst, - unsigned Op0, unsigned Op1) { +MachineInstrBuilder MachineIRBuilderBase::buildSelect(unsigned Res, + unsigned Tst, + unsigned Op0, + unsigned Op1) { #ifndef NDEBUG - LLT ResTy = MRI->getType(Res); + LLT ResTy = getMRI()->getType(Res); assert((ResTy.isScalar() || ResTy.isVector() || ResTy.isPointer()) && "invalid operand type"); - assert(ResTy == MRI->getType(Op0) && ResTy == MRI->getType(Op1) && + assert(ResTy == getMRI()->getType(Op0) && ResTy == getMRI()->getType(Op1) && "type mismatch"); if (ResTy.isScalar() || ResTy.isPointer()) - assert(MRI->getType(Tst).isScalar() && "type mismatch"); + assert(getMRI()->getType(Tst).isScalar() && "type mismatch"); else - assert((MRI->getType(Tst).isScalar() || - (MRI->getType(Tst).isVector() && - MRI->getType(Tst).getNumElements() == - MRI->getType(Op0).getNumElements())) && + assert((getMRI()->getType(Tst).isScalar() || + (getMRI()->getType(Tst).isVector() && + getMRI()->getType(Tst).getNumElements() == + getMRI()->getType(Op0).getNumElements())) && "type mismatch"); #endif @@ -617,15 +629,14 @@ MachineInstrBuilder MachineIRBuilder::buildSelect(unsigned Res, unsigned Tst, .addUse(Op1); } -MachineInstrBuilder MachineIRBuilder::buildInsertVectorElement(unsigned Res, - unsigned Val, - unsigned Elt, - unsigned Idx) { +MachineInstrBuilder +MachineIRBuilderBase::buildInsertVectorElement(unsigned Res, unsigned Val, + unsigned Elt, unsigned Idx) { #ifndef NDEBUG - LLT ResTy = MRI->getType(Res); - LLT ValTy = MRI->getType(Val); - LLT EltTy = MRI->getType(Elt); - LLT IdxTy = MRI->getType(Idx); + LLT ResTy = getMRI()->getType(Res); + LLT ValTy = getMRI()->getType(Val); + LLT EltTy = getMRI()->getType(Elt); + LLT IdxTy = getMRI()->getType(Idx); assert(ResTy.isVector() && ValTy.isVector() && "invalid operand type"); assert(IdxTy.isScalar() && "invalid operand type"); assert(ResTy.getNumElements() == ValTy.getNumElements() && "type mismatch"); @@ -639,13 +650,13 @@ MachineInstrBuilder MachineIRBuilder::buildInsertVectorElement(unsigned Res, .addUse(Idx); } -MachineInstrBuilder MachineIRBuilder::buildExtractVectorElement(unsigned Res, - unsigned Val, - unsigned Idx) { +MachineInstrBuilder +MachineIRBuilderBase::buildExtractVectorElement(unsigned Res, unsigned Val, + unsigned Idx) { #ifndef NDEBUG - LLT ResTy = MRI->getType(Res); - LLT ValTy = MRI->getType(Val); - LLT IdxTy = MRI->getType(Idx); + LLT ResTy = getMRI()->getType(Res); + LLT ValTy = getMRI()->getType(Val); + LLT IdxTy = getMRI()->getType(Idx); assert(ValTy.isVector() && "invalid operand type"); assert((ResTy.isScalar() || ResTy.isPointer()) && "invalid operand type"); assert(IdxTy.isScalar() && "invalid operand type"); @@ -658,15 +669,42 @@ MachineInstrBuilder MachineIRBuilder::buildExtractVectorElement(unsigned Res, .addUse(Idx); } +MachineInstrBuilder MachineIRBuilderBase::buildAtomicCmpXchgWithSuccess( + unsigned OldValRes, unsigned SuccessRes, unsigned Addr, unsigned CmpVal, + unsigned NewVal, MachineMemOperand &MMO) { +#ifndef NDEBUG + LLT OldValResTy = getMRI()->getType(OldValRes); + LLT SuccessResTy = getMRI()->getType(SuccessRes); + LLT AddrTy = getMRI()->getType(Addr); + LLT CmpValTy = getMRI()->getType(CmpVal); + LLT NewValTy = getMRI()->getType(NewVal); + assert(OldValResTy.isScalar() && "invalid operand type"); + assert(SuccessResTy.isScalar() && "invalid operand type"); + assert(AddrTy.isPointer() && "invalid operand type"); + assert(CmpValTy.isValid() && "invalid operand type"); + assert(NewValTy.isValid() && "invalid operand type"); + assert(OldValResTy == CmpValTy && "type mismatch"); + assert(OldValResTy == NewValTy && "type mismatch"); +#endif + + return buildInstr(TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) + .addDef(OldValRes) + .addDef(SuccessRes) + .addUse(Addr) + .addUse(CmpVal) + .addUse(NewVal) + .addMemOperand(&MMO); +} + MachineInstrBuilder -MachineIRBuilder::buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr, - unsigned CmpVal, unsigned NewVal, - MachineMemOperand &MMO) { +MachineIRBuilderBase::buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr, + unsigned CmpVal, unsigned NewVal, + MachineMemOperand &MMO) { #ifndef NDEBUG - LLT OldValResTy = MRI->getType(OldValRes); - LLT AddrTy = MRI->getType(Addr); - LLT CmpValTy = MRI->getType(CmpVal); - LLT NewValTy = MRI->getType(NewVal); + LLT OldValResTy = getMRI()->getType(OldValRes); + LLT AddrTy = getMRI()->getType(Addr); + LLT CmpValTy = getMRI()->getType(CmpVal); + LLT NewValTy = getMRI()->getType(NewVal); assert(OldValResTy.isScalar() && "invalid operand type"); assert(AddrTy.isPointer() && "invalid operand type"); assert(CmpValTy.isValid() && "invalid operand type"); @@ -683,14 +721,102 @@ MachineIRBuilder::buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr, .addMemOperand(&MMO); } -void MachineIRBuilder::validateTruncExt(unsigned Dst, unsigned Src, - bool IsExtend) { +MachineInstrBuilder +MachineIRBuilderBase::buildAtomicRMW(unsigned Opcode, unsigned OldValRes, + unsigned Addr, unsigned Val, + MachineMemOperand &MMO) { +#ifndef NDEBUG + LLT OldValResTy = getMRI()->getType(OldValRes); + LLT AddrTy = getMRI()->getType(Addr); + LLT ValTy = getMRI()->getType(Val); + assert(OldValResTy.isScalar() && "invalid operand type"); + assert(AddrTy.isPointer() && "invalid operand type"); + assert(ValTy.isValid() && "invalid operand type"); + assert(OldValResTy == ValTy && "type mismatch"); +#endif + + return buildInstr(Opcode) + .addDef(OldValRes) + .addUse(Addr) + .addUse(Val) + .addMemOperand(&MMO); +} + +MachineInstrBuilder +MachineIRBuilderBase::buildAtomicRMWXchg(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO) { + return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_XCHG, OldValRes, Addr, Val, + MMO); +} +MachineInstrBuilder +MachineIRBuilderBase::buildAtomicRMWAdd(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO) { + return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_ADD, OldValRes, Addr, Val, + MMO); +} +MachineInstrBuilder +MachineIRBuilderBase::buildAtomicRMWSub(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO) { + return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_SUB, OldValRes, Addr, Val, + MMO); +} +MachineInstrBuilder +MachineIRBuilderBase::buildAtomicRMWAnd(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO) { + return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_AND, OldValRes, Addr, Val, + MMO); +} +MachineInstrBuilder +MachineIRBuilderBase::buildAtomicRMWNand(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO) { + return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_NAND, OldValRes, Addr, Val, + MMO); +} +MachineInstrBuilder +MachineIRBuilderBase::buildAtomicRMWOr(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO) { + return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_OR, OldValRes, Addr, Val, + MMO); +} +MachineInstrBuilder +MachineIRBuilderBase::buildAtomicRMWXor(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO) { + return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_XOR, OldValRes, Addr, Val, + MMO); +} +MachineInstrBuilder +MachineIRBuilderBase::buildAtomicRMWMax(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO) { + return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_MAX, OldValRes, Addr, Val, + MMO); +} +MachineInstrBuilder +MachineIRBuilderBase::buildAtomicRMWMin(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO) { + return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_MIN, OldValRes, Addr, Val, + MMO); +} +MachineInstrBuilder +MachineIRBuilderBase::buildAtomicRMWUmax(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO) { + return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_UMAX, OldValRes, Addr, Val, + MMO); +} +MachineInstrBuilder +MachineIRBuilderBase::buildAtomicRMWUmin(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO) { + return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_UMIN, OldValRes, Addr, Val, + MMO); +} + +void MachineIRBuilderBase::validateTruncExt(unsigned Dst, unsigned Src, + bool IsExtend) { #ifndef NDEBUG - LLT SrcTy = MRI->getType(Src); - LLT DstTy = MRI->getType(Dst); + LLT SrcTy = getMRI()->getType(Src); + LLT DstTy = getMRI()->getType(Dst); if (DstTy.isVector()) { - assert(SrcTy.isVector() && "mismatched cast between vecot and non-vector"); + assert(SrcTy.isVector() && "mismatched cast between vector and non-vector"); assert(SrcTy.getNumElements() == DstTy.getNumElements() && "different number of elements in a trunc/ext"); } else diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp index 006c9ea23034..9e2d48d1dc42 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -30,6 +30,7 @@ #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" #include "llvm/Pass.h" @@ -75,7 +76,7 @@ RegBankSelect::RegBankSelect(Mode RunningMode) if (RegBankSelectMode.getNumOccurrences() != 0) { OptMode = RegBankSelectMode; if (RegBankSelectMode != RunningMode) - DEBUG(dbgs() << "RegBankSelect mode overrided by command line\n"); + LLVM_DEBUG(dbgs() << "RegBankSelect mode overrided by command line\n"); } } @@ -104,6 +105,7 @@ void RegBankSelect::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<MachineBranchProbabilityInfo>(); } AU.addRequired<TargetPassConfig>(); + getSelectionDAGFallbackAnalysisUsage(AU); MachineFunctionPass::getAnalysisUsage(AU); } @@ -122,11 +124,11 @@ bool RegBankSelect::assignmentMatch( // Reg is free of assignment, a simple assignment will make the // register bank to match. OnlyAssign = CurRegBank == nullptr; - DEBUG(dbgs() << "Does assignment already match: "; - if (CurRegBank) dbgs() << *CurRegBank; else dbgs() << "none"; - dbgs() << " against "; - assert(DesiredRegBrank && "The mapping must be valid"); - dbgs() << *DesiredRegBrank << '\n';); + LLVM_DEBUG(dbgs() << "Does assignment already match: "; + if (CurRegBank) dbgs() << *CurRegBank; else dbgs() << "none"; + dbgs() << " against "; + assert(DesiredRegBrank && "The mapping must be valid"); + dbgs() << *DesiredRegBrank << '\n';); return CurRegBank == DesiredRegBrank; } @@ -159,8 +161,8 @@ bool RegBankSelect::repairReg( // same types because the type is a placeholder when this function is called. MachineInstr *MI = MIRBuilder.buildInstrNoInsert(TargetOpcode::COPY).addDef(Dst).addUse(Src); - DEBUG(dbgs() << "Copy: " << printReg(Src) << " to: " << printReg(Dst) - << '\n'); + LLVM_DEBUG(dbgs() << "Copy: " << printReg(Src) << " to: " << printReg(Dst) + << '\n'); // TODO: // Check if MI is legal. if not, we need to legalize all the // instructions we are going to insert. @@ -245,7 +247,7 @@ const RegisterBankInfo::InstructionMapping &RegBankSelect::findBestMapping( MappingCost CurCost = computeMapping(MI, *CurMapping, LocalRepairPts, &Cost); if (CurCost < Cost) { - DEBUG(dbgs() << "New best: " << CurCost << '\n'); + LLVM_DEBUG(dbgs() << "New best: " << CurCost << '\n'); Cost = CurCost; BestMapping = CurMapping; RepairPts.clear(); @@ -397,11 +399,11 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping( MappingCost Cost(MBFI ? MBFI->getBlockFreq(MI.getParent()) : 1); bool Saturated = Cost.addLocalCost(InstrMapping.getCost()); assert(!Saturated && "Possible mapping saturated the cost"); - DEBUG(dbgs() << "Evaluating mapping cost for: " << MI); - DEBUG(dbgs() << "With: " << InstrMapping << '\n'); + LLVM_DEBUG(dbgs() << "Evaluating mapping cost for: " << MI); + LLVM_DEBUG(dbgs() << "With: " << InstrMapping << '\n'); RepairPts.clear(); if (BestCost && Cost > *BestCost) { - DEBUG(dbgs() << "Mapping is too expensive from the start\n"); + LLVM_DEBUG(dbgs() << "Mapping is too expensive from the start\n"); return Cost; } @@ -417,17 +419,17 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping( unsigned Reg = MO.getReg(); if (!Reg) continue; - DEBUG(dbgs() << "Opd" << OpIdx << '\n'); + LLVM_DEBUG(dbgs() << "Opd" << OpIdx << '\n'); const RegisterBankInfo::ValueMapping &ValMapping = InstrMapping.getOperandMapping(OpIdx); // If Reg is already properly mapped, this is free. bool Assign; if (assignmentMatch(Reg, ValMapping, Assign)) { - DEBUG(dbgs() << "=> is free (match).\n"); + LLVM_DEBUG(dbgs() << "=> is free (match).\n"); continue; } if (Assign) { - DEBUG(dbgs() << "=> is free (simple assignment).\n"); + LLVM_DEBUG(dbgs() << "=> is free (simple assignment).\n"); RepairPts.emplace_back(RepairingPlacement(MI, OpIdx, *TRI, *this, RepairingPlacement::Reassign)); continue; @@ -446,7 +448,7 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping( // Check that the materialization of the repairing is possible. if (!RepairPt.canMaterialize()) { - DEBUG(dbgs() << "Mapping involves impossible repairing\n"); + LLVM_DEBUG(dbgs() << "Mapping involves impossible repairing\n"); return MappingCost::ImpossibleCost(); } @@ -473,7 +475,7 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping( // This is an impossible to repair cost. if (RepairCost == std::numeric_limits<unsigned>::max()) - continue; + return MappingCost::ImpossibleCost(); // Bias used for splitting: 5%. const uint64_t PercentageForBias = 5; @@ -509,7 +511,7 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping( // Stop looking into what it takes to repair, this is already // too expensive. if (BestCost && Cost > *BestCost) { - DEBUG(dbgs() << "Mapping is too expensive, stop processing\n"); + LLVM_DEBUG(dbgs() << "Mapping is too expensive, stop processing\n"); return Cost; } @@ -519,7 +521,7 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping( break; } } - DEBUG(dbgs() << "Total cost is: " << Cost << "\n"); + LLVM_DEBUG(dbgs() << "Total cost is: " << Cost << "\n"); return Cost; } @@ -559,14 +561,14 @@ bool RegBankSelect::applyMapping( } // Second, rewrite the instruction. - DEBUG(dbgs() << "Actual mapping of the operands: " << OpdMapper << '\n'); + LLVM_DEBUG(dbgs() << "Actual mapping of the operands: " << OpdMapper << '\n'); RBI->applyMapping(OpdMapper); return true; } bool RegBankSelect::assignInstr(MachineInstr &MI) { - DEBUG(dbgs() << "Assign: " << MI); + LLVM_DEBUG(dbgs() << "Assign: " << MI); // Remember the repairing placement for all the operands. SmallVector<RepairingPlacement, 4> RepairPts; @@ -587,7 +589,7 @@ bool RegBankSelect::assignInstr(MachineInstr &MI) { // Make sure the mapping is valid for MI. assert(BestMapping->verify(MI) && "Invalid instruction mapping"); - DEBUG(dbgs() << "Best Mapping: " << *BestMapping << '\n'); + LLVM_DEBUG(dbgs() << "Best Mapping: " << *BestMapping << '\n'); // After this call, MI may not be valid anymore. // Do not use it. @@ -600,7 +602,7 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) { MachineFunctionProperties::Property::FailedISel)) return false; - DEBUG(dbgs() << "Assign register banks for: " << MF.getName() << '\n'); + LLVM_DEBUG(dbgs() << "Assign register banks for: " << MF.getName() << '\n'); const Function &F = MF.getFunction(); Mode SaveOptMode = OptMode; if (F.hasFnAttribute(Attribute::OptimizeNone)) @@ -610,20 +612,13 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) { #ifndef NDEBUG // Check that our input is fully legal: we require the function to have the // Legalized property, so it should be. - // FIXME: This should be in the MachineVerifier, but it can't use the - // LegalizerInfo as it's currently in the separate GlobalISel library. - const MachineRegisterInfo &MRI = MF.getRegInfo(); - if (const LegalizerInfo *MLI = MF.getSubtarget().getLegalizerInfo()) { - for (MachineBasicBlock &MBB : MF) { - for (MachineInstr &MI : MBB) { - if (isPreISelGenericOpcode(MI.getOpcode()) && !MLI->isLegal(MI, MRI)) { - reportGISelFailure(MF, *TPC, *MORE, "gisel-regbankselect", - "instruction is not legal", MI); - return false; - } - } + // FIXME: This should be in the MachineVerifier. + if (!DisableGISelLegalityCheck) + if (const MachineInstr *MI = machineFunctionIsIllegal(MF)) { + reportGISelFailure(MF, *TPC, *MORE, "gisel-regbankselect", + "instruction is not legal", *MI); + return false; } - } #endif // Walk the function and assign register banks to all operands. diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp index 4d3ae69d3a9d..16f67a217ce1 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp @@ -12,6 +12,7 @@ #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/Config/llvm-config.h" #define DEBUG_TYPE "registerbank" diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp index b3d9209ae6eb..dd15567ef1c1 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -72,7 +73,7 @@ bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const { const RegisterBank &RegBank = getRegBank(Idx); assert(Idx == RegBank.getID() && "ID does not match the index in the array"); - DEBUG(dbgs() << "Verify " << RegBank << '\n'); + LLVM_DEBUG(dbgs() << "Verify " << RegBank << '\n'); assert(RegBank.verify(TRI) && "RegBank is invalid"); } #endif // NDEBUG @@ -403,18 +404,18 @@ RegisterBankInfo::getInstrAlternativeMappings(const MachineInstr &MI) const { void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) { MachineInstr &MI = OpdMapper.getMI(); MachineRegisterInfo &MRI = OpdMapper.getMRI(); - DEBUG(dbgs() << "Applying default-like mapping\n"); + LLVM_DEBUG(dbgs() << "Applying default-like mapping\n"); for (unsigned OpIdx = 0, EndIdx = OpdMapper.getInstrMapping().getNumOperands(); OpIdx != EndIdx; ++OpIdx) { - DEBUG(dbgs() << "OpIdx " << OpIdx); + LLVM_DEBUG(dbgs() << "OpIdx " << OpIdx); MachineOperand &MO = MI.getOperand(OpIdx); if (!MO.isReg()) { - DEBUG(dbgs() << " is not a register, nothing to be done\n"); + LLVM_DEBUG(dbgs() << " is not a register, nothing to be done\n"); continue; } if (!MO.getReg()) { - DEBUG(dbgs() << " is %%noreg, nothing to be done\n"); + LLVM_DEBUG(dbgs() << " is %%noreg, nothing to be done\n"); continue; } assert(OpdMapper.getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns != @@ -426,14 +427,14 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) { iterator_range<SmallVectorImpl<unsigned>::const_iterator> NewRegs = OpdMapper.getVRegs(OpIdx); if (NewRegs.begin() == NewRegs.end()) { - DEBUG(dbgs() << " has not been repaired, nothing to be done\n"); + LLVM_DEBUG(dbgs() << " has not been repaired, nothing to be done\n"); continue; } unsigned OrigReg = MO.getReg(); unsigned NewReg = *NewRegs.begin(); - DEBUG(dbgs() << " changed, replace " << printReg(OrigReg, nullptr)); + LLVM_DEBUG(dbgs() << " changed, replace " << printReg(OrigReg, nullptr)); MO.setReg(NewReg); - DEBUG(dbgs() << " with " << printReg(NewReg, nullptr)); + LLVM_DEBUG(dbgs() << " with " << printReg(NewReg, nullptr)); // The OperandsMapper creates plain scalar, we may have to fix that. // Check if the types match and if not, fix that. @@ -447,35 +448,27 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) { assert(OrigTy.getSizeInBits() <= NewTy.getSizeInBits() && "Types with difference size cannot be handled by the default " "mapping"); - DEBUG(dbgs() << "\nChange type of new opd from " << NewTy << " to " - << OrigTy); + LLVM_DEBUG(dbgs() << "\nChange type of new opd from " << NewTy << " to " + << OrigTy); MRI.setType(NewReg, OrigTy); } - DEBUG(dbgs() << '\n'); + LLVM_DEBUG(dbgs() << '\n'); } } unsigned RegisterBankInfo::getSizeInBits(unsigned Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const { - const TargetRegisterClass *RC = nullptr; if (TargetRegisterInfo::isPhysicalRegister(Reg)) { // The size is not directly available for physical registers. // Instead, we need to access a register class that contains Reg and // get the size of that register class. - RC = &getMinimalPhysRegClass(Reg, TRI); - } else { - LLT Ty = MRI.getType(Reg); - unsigned RegSize = Ty.isValid() ? Ty.getSizeInBits() : 0; - // If Reg is not a generic register, query the register class to - // get its size. - if (RegSize) - return RegSize; - // Since Reg is not a generic register, it must have a register class. - RC = MRI.getRegClass(Reg); + // Because this is expensive, we'll cache the register class by calling + auto *RC = &getMinimalPhysRegClass(Reg, TRI); + assert(RC && "Expecting Register class"); + return TRI.getRegSizeInBits(*RC); } - assert(RC && "Unable to deduce the register class"); - return TRI.getRegSizeInBits(*RC); + return TRI.getRegSizeInBits(Reg, MRI); } //------------------------------------------------------------------------------ diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp index ef990b49aceb..1a5f88743d5f 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -11,12 +11,14 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/ADT/APFloat.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/StackProtector.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -42,20 +44,94 @@ unsigned llvm::constrainRegToClass(MachineRegisterInfo &MRI, return Reg; } - unsigned llvm::constrainOperandRegClass( const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II, - unsigned Reg, unsigned OpIdx) { + const MachineOperand &RegMO, unsigned OpIdx) { + unsigned Reg = RegMO.getReg(); // Assume physical registers are properly constrained. assert(TargetRegisterInfo::isVirtualRegister(Reg) && "PhysReg not implemented"); const TargetRegisterClass *RegClass = TII.getRegClass(II, OpIdx, &TRI, MF); + // Some of the target independent instructions, like COPY, may not impose any + // register class constraints on some of their operands: If it's a use, we can + // skip constraining as the instruction defining the register would constrain + // it. + + // We can't constrain unallocatable register classes, because we can't create + // virtual registers for these classes, so we need to let targets handled this + // case. + if (RegClass && !RegClass->isAllocatable()) + RegClass = TRI.getConstrainedRegClassForOperand(RegMO, MRI); + + if (!RegClass) { + assert((!isTargetSpecificOpcode(II.getOpcode()) || RegMO.isUse()) && + "Register class constraint is required unless either the " + "instruction is target independent or the operand is a use"); + // FIXME: Just bailing out like this here could be not enough, unless we + // expect the users of this function to do the right thing for PHIs and + // COPY: + // v1 = COPY v0 + // v2 = COPY v1 + // v1 here may end up not being constrained at all. Please notice that to + // reproduce the issue we likely need a destination pattern of a selection + // rule producing such extra copies, not just an input GMIR with them as + // every existing target using selectImpl handles copies before calling it + // and they never reach this function. + return Reg; + } return constrainRegToClass(MRI, TII, RBI, InsertPt, Reg, *RegClass); } +bool llvm::constrainSelectedInstRegOperands(MachineInstr &I, + const TargetInstrInfo &TII, + const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI) { + assert(!isPreISelGenericOpcode(I.getOpcode()) && + "A selected instruction is expected"); + MachineBasicBlock &MBB = *I.getParent(); + MachineFunction &MF = *MBB.getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + for (unsigned OpI = 0, OpE = I.getNumExplicitOperands(); OpI != OpE; ++OpI) { + MachineOperand &MO = I.getOperand(OpI); + + // There's nothing to be done on non-register operands. + if (!MO.isReg()) + continue; + + LLVM_DEBUG(dbgs() << "Converting operand: " << MO << '\n'); + assert(MO.isReg() && "Unsupported non-reg operand"); + + unsigned Reg = MO.getReg(); + // Physical registers don't need to be constrained. + if (TRI.isPhysicalRegister(Reg)) + continue; + + // Register operands with a value of 0 (e.g. predicate operands) don't need + // to be constrained. + if (Reg == 0) + continue; + + // If the operand is a vreg, we should constrain its regclass, and only + // insert COPYs if that's impossible. + // constrainOperandRegClass does that for us. + MO.setReg(constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, I.getDesc(), + MO, OpI)); + + // Tie uses to defs as indicated in MCInstrDesc if this hasn't already been + // done. + if (MO.isUse()) { + int DefIdx = I.getDesc().getOperandConstraint(OpI, MCOI::TIED_TO); + if (DefIdx != -1 && !I.isRegTiedToUseOperand(DefIdx)) + I.tieOperands(DefIdx, OpI); + } + } + return true; +} + bool llvm::isTriviallyDead(const MachineInstr &MI, const MachineRegisterInfo &MRI) { // If we can move an instruction, we can remove it. Otherwise, it has @@ -101,7 +177,7 @@ void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC, MI.getDebugLoc(), MI.getParent()); R << Msg; // Printing MI is expensive; only do it if expensive remarks are enabled. - if (MORE.allowExtraAnalysis(PassName)) + if (TPC.isGlobalISelAbortEnabled() || MORE.allowExtraAnalysis(PassName)) R << ": " << ore::MNV("Inst", MI); reportGISelFailure(MF, TPC, MORE, R); } @@ -145,3 +221,20 @@ llvm::MachineInstr *llvm::getOpcodeDef(unsigned Opcode, unsigned Reg, } return DefMI->getOpcode() == Opcode ? DefMI : nullptr; } + +APFloat llvm::getAPFloatFromSize(double Val, unsigned Size) { + if (Size == 32) + return APFloat(float(Val)); + if (Size == 64) + return APFloat(Val); + if (Size != 16) + llvm_unreachable("Unsupported FPConstant size"); + bool Ignored; + APFloat APF(Val); + APF.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &Ignored); + return APF; +} + +void llvm::getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU) { + AU.addPreserved<StackProtector>(); +} diff --git a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp index 3888226fa059..ca56f4e0c4f1 100644 --- a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp @@ -70,7 +70,6 @@ #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/TargetLoweringObjectFile.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -89,6 +88,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include <algorithm> #include <cassert> @@ -159,13 +159,13 @@ namespace { bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals, Module &M, bool isConst, unsigned AddrSpace) const; - /// \brief Merge everything in \p Globals for which the corresponding bit + /// Merge everything in \p Globals for which the corresponding bit /// in \p GlobalSet is set. bool doMerge(const SmallVectorImpl<GlobalVariable *> &Globals, const BitVector &GlobalSet, Module &M, bool isConst, unsigned AddrSpace) const; - /// \brief Check if the given variable has been identified as must keep + /// Check if the given variable has been identified as must keep /// \pre setMustKeepGlobalVariables must have been called on the Module that /// contains GV bool isMustKeepGlobalVariable(const GlobalVariable *GV) const { @@ -177,7 +177,7 @@ namespace { void setMustKeepGlobalVariables(Module &M); /// Collect every variables marked as "used" - void collectUsedGlobalVariables(Module &M); + void collectUsedGlobalVariables(Module &M, StringRef Name); /// Keep track of the GlobalVariable that must not be merged away SmallPtrSet<const GlobalVariable *, 16> MustKeepGlobalVariables; @@ -242,7 +242,7 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, // code (currently, a Function) to the set of globals seen so far that are // used together in that unit (GlobalUsesByFunction). // - // When we look at the Nth global, we now that any new set is either: + // When we look at the Nth global, we know that any new set is either: // - the singleton set {N}, containing this global only, or // - the union of {N} and a previously-discovered set, containing some // combination of the previous N-1 globals. @@ -440,28 +440,44 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals, assert(Globals.size() > 1); Type *Int32Ty = Type::getInt32Ty(M.getContext()); + Type *Int8Ty = Type::getInt8Ty(M.getContext()); auto &DL = M.getDataLayout(); - DEBUG(dbgs() << " Trying to merge set, starts with #" - << GlobalSet.find_first() << "\n"); + LLVM_DEBUG(dbgs() << " Trying to merge set, starts with #" + << GlobalSet.find_first() << "\n"); + bool Changed = false; ssize_t i = GlobalSet.find_first(); while (i != -1) { ssize_t j = 0; uint64_t MergedSize = 0; std::vector<Type*> Tys; std::vector<Constant*> Inits; + std::vector<unsigned> StructIdxs; bool HasExternal = false; StringRef FirstExternalName; + unsigned MaxAlign = 1; + unsigned CurIdx = 0; for (j = i; j != -1; j = GlobalSet.find_next(j)) { Type *Ty = Globals[j]->getValueType(); + unsigned Align = DL.getPreferredAlignment(Globals[j]); + unsigned Padding = alignTo(MergedSize, Align) - MergedSize; + MergedSize += Padding; MergedSize += DL.getTypeAllocSize(Ty); if (MergedSize > MaxOffset) { break; } + if (Padding) { + Tys.push_back(ArrayType::get(Int8Ty, Padding)); + Inits.push_back(ConstantAggregateZero::get(Tys.back())); + ++CurIdx; + } Tys.push_back(Ty); Inits.push_back(Globals[j]->getInitializer()); + StructIdxs.push_back(CurIdx++); + + MaxAlign = std::max(MaxAlign, Align); if (Globals[j]->hasExternalLinkage() && !HasExternal) { HasExternal = true; @@ -469,12 +485,19 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals, } } + // Exit early if there is only one global to merge. + if (Tys.size() < 2) { + i = j; + continue; + } + // If merged variables doesn't have external linkage, we needn't to expose // the symbol after merging. GlobalValue::LinkageTypes Linkage = HasExternal ? GlobalValue::ExternalLinkage : GlobalValue::InternalLinkage; - StructType *MergedTy = StructType::get(M.getContext(), Tys); + // Use a packed struct so we can control alignment. + StructType *MergedTy = StructType::get(M.getContext(), Tys, true); Constant *MergedInit = ConstantStruct::get(MergedTy, Inits); // On Darwin external linkage needs to be preserved, otherwise @@ -492,19 +515,23 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals, M, MergedTy, isConst, MergedLinkage, MergedInit, MergedName, nullptr, GlobalVariable::NotThreadLocal, AddrSpace); - const StructLayout *MergedLayout = DL.getStructLayout(MergedTy); + MergedGV->setAlignment(MaxAlign); + const StructLayout *MergedLayout = DL.getStructLayout(MergedTy); for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) { GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage(); std::string Name = Globals[k]->getName(); + GlobalValue::DLLStorageClassTypes DLLStorage = + Globals[k]->getDLLStorageClass(); // Copy metadata while adjusting any debug info metadata by the original // global's offset within the merged global. - MergedGV->copyMetadata(Globals[k], MergedLayout->getElementOffset(idx)); + MergedGV->copyMetadata(Globals[k], + MergedLayout->getElementOffset(StructIdxs[idx])); Constant *Idx[2] = { - ConstantInt::get(Int32Ty, 0), - ConstantInt::get(Int32Ty, idx), + ConstantInt::get(Int32Ty, 0), + ConstantInt::get(Int32Ty, StructIdxs[idx]), }; Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedTy, MergedGV, Idx); @@ -517,20 +544,23 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals, // It's not safe on Mach-O as the alias (and thus the portion of the // MergedGlobals variable) may be dead stripped at link time. if (Linkage != GlobalValue::InternalLinkage || !IsMachO) { - GlobalAlias::create(Tys[idx], AddrSpace, Linkage, Name, GEP, &M); + GlobalAlias *GA = GlobalAlias::create(Tys[StructIdxs[idx]], AddrSpace, + Linkage, Name, GEP, &M); + GA->setDLLStorageClass(DLLStorage); } NumMerged++; } + Changed = true; i = j; } - return true; + return Changed; } -void GlobalMerge::collectUsedGlobalVariables(Module &M) { +void GlobalMerge::collectUsedGlobalVariables(Module &M, StringRef Name) { // Extract global variables from llvm.used array - const GlobalVariable *GV = M.getGlobalVariable("llvm.used"); + const GlobalVariable *GV = M.getGlobalVariable(Name); if (!GV || !GV->hasInitializer()) return; // Should be an array of 'i8*'. @@ -543,7 +573,8 @@ void GlobalMerge::collectUsedGlobalVariables(Module &M) { } void GlobalMerge::setMustKeepGlobalVariables(Module &M) { - collectUsedGlobalVariables(M); + collectUsedGlobalVariables(M, "llvm.used"); + collectUsedGlobalVariables(M, "llvm.compiler.used"); for (Function &F : M) { for (BasicBlock &BB : F) { @@ -577,8 +608,7 @@ bool GlobalMerge::doInitialization(Module &M) { for (auto &GV : M.globals()) { // Merge is safe for "normal" internal or external globals only if (GV.isDeclaration() || GV.isThreadLocal() || - GV.hasSection() || GV.hasImplicitSection() || - GV.hasDLLExportStorageClass()) + GV.hasSection() || GV.hasImplicitSection()) continue; // It's not safe to merge globals that may be preempted @@ -594,12 +624,6 @@ bool GlobalMerge::doInitialization(Module &M) { unsigned AddressSpace = PT->getAddressSpace(); - // Ignore fancy-aligned globals for now. - unsigned Alignment = DL.getPreferredAlignment(&GV); - Type *Ty = GV.getValueType(); - if (Alignment > DL.getABITypeAlignment(Ty)) - continue; - // Ignore all 'special' globals. if (GV.getName().startswith("llvm.") || GV.getName().startswith(".llvm.")) @@ -609,6 +633,7 @@ bool GlobalMerge::doInitialization(Module &M) { if (isMustKeepGlobalVariable(&GV)) continue; + Type *Ty = GV.getValueType(); if (DL.getTypeAllocSize(Ty) < MaxOffset) { if (TM && TargetLoweringObjectFile::getKindForGlobal(&GV, *TM).isBSSLocal()) diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp index d8ce90e63a9d..f12d00071b24 100644 --- a/contrib/llvm/lib/CodeGen/IfConversion.cpp +++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp @@ -252,7 +252,7 @@ namespace { BBInfo &TrueBBI, BBInfo &FalseBBI) const; void AnalyzeBlock(MachineBasicBlock &MBB, std::vector<std::unique_ptr<IfcvtToken>> &Tokens); - bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Cond, + bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Pred, bool isTriangle = false, bool RevBranch = false, bool hasCommonTail = false); void AnalyzeBlocks(MachineFunction &MF, @@ -347,7 +347,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { BranchFolder::MBFIWrapper MBFI(getAnalysis<MachineBlockFrequencyInfo>()); MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); MRI = &MF.getRegInfo(); - SchedModel.init(ST.getSchedModel(), &ST, TII); + SchedModel.init(&ST); if (!TII) return false; @@ -361,14 +361,14 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { getAnalysisIfAvailable<MachineModuleInfo>()); } - DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'" - << MF.getName() << "\'"); + LLVM_DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'" + << MF.getName() << "\'"); if (FnNum < IfCvtFnStart || (IfCvtFnStop != -1 && FnNum > IfCvtFnStop)) { - DEBUG(dbgs() << " skipped\n"); + LLVM_DEBUG(dbgs() << " skipped\n"); return false; } - DEBUG(dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "\n"); MF.RenumberBlocks(); BBAnalysis.resize(MF.getNumBlockIDs()); @@ -406,14 +406,14 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { case ICSimpleFalse: { bool isFalse = Kind == ICSimpleFalse; if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break; - DEBUG(dbgs() << "Ifcvt (Simple" - << (Kind == ICSimpleFalse ? " false" : "") - << "): " << printMBBReference(*BBI.BB) << " (" - << ((Kind == ICSimpleFalse) ? BBI.FalseBB->getNumber() - : BBI.TrueBB->getNumber()) - << ") "); + LLVM_DEBUG(dbgs() << "Ifcvt (Simple" + << (Kind == ICSimpleFalse ? " false" : "") + << "): " << printMBBReference(*BBI.BB) << " (" + << ((Kind == ICSimpleFalse) ? BBI.FalseBB->getNumber() + : BBI.TrueBB->getNumber()) + << ") "); RetVal = IfConvertSimple(BBI, Kind); - DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); + LLVM_DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); if (RetVal) { if (isFalse) ++NumSimpleFalse; else ++NumSimple; @@ -430,16 +430,16 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { if (DisableTriangleR && !isFalse && isRev) break; if (DisableTriangleF && isFalse && !isRev) break; if (DisableTriangleFR && isFalse && isRev) break; - DEBUG(dbgs() << "Ifcvt (Triangle"); + LLVM_DEBUG(dbgs() << "Ifcvt (Triangle"); if (isFalse) - DEBUG(dbgs() << " false"); + LLVM_DEBUG(dbgs() << " false"); if (isRev) - DEBUG(dbgs() << " rev"); - DEBUG(dbgs() << "): " << printMBBReference(*BBI.BB) - << " (T:" << BBI.TrueBB->getNumber() - << ",F:" << BBI.FalseBB->getNumber() << ") "); + LLVM_DEBUG(dbgs() << " rev"); + LLVM_DEBUG(dbgs() << "): " << printMBBReference(*BBI.BB) + << " (T:" << BBI.TrueBB->getNumber() + << ",F:" << BBI.FalseBB->getNumber() << ") "); RetVal = IfConvertTriangle(BBI, Kind); - DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); + LLVM_DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); if (RetVal) { if (isFalse) { if (isRev) ++NumTriangleFRev; @@ -453,24 +453,25 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { } case ICDiamond: if (DisableDiamond) break; - DEBUG(dbgs() << "Ifcvt (Diamond): " << printMBBReference(*BBI.BB) - << " (T:" << BBI.TrueBB->getNumber() - << ",F:" << BBI.FalseBB->getNumber() << ") "); + LLVM_DEBUG(dbgs() << "Ifcvt (Diamond): " << printMBBReference(*BBI.BB) + << " (T:" << BBI.TrueBB->getNumber() + << ",F:" << BBI.FalseBB->getNumber() << ") "); RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2, Token->TClobbersPred, Token->FClobbersPred); - DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); + LLVM_DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); if (RetVal) ++NumDiamonds; break; case ICForkedDiamond: if (DisableForkedDiamond) break; - DEBUG(dbgs() << "Ifcvt (Forked Diamond): " << printMBBReference(*BBI.BB) - << " (T:" << BBI.TrueBB->getNumber() - << ",F:" << BBI.FalseBB->getNumber() << ") "); + LLVM_DEBUG(dbgs() << "Ifcvt (Forked Diamond): " + << printMBBReference(*BBI.BB) + << " (T:" << BBI.TrueBB->getNumber() + << ",F:" << BBI.FalseBB->getNumber() << ") "); RetVal = IfConvertForkedDiamond(BBI, Kind, NumDups, NumDups2, Token->TClobbersPred, Token->FClobbersPred); - DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); + LLVM_DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); if (RetVal) ++NumForkedDiamonds; break; } @@ -948,7 +949,7 @@ void IfConverter::ScanInstructions(BBInfo &BBI, BBI.ExtraCost2 = 0; BBI.ClobbersPred = false; for (MachineInstr &MI : make_range(Begin, End)) { - if (MI.isDebugValue()) + if (MI.isDebugInstr()) continue; // It's unsafe to duplicate convergent instructions in this context, so set @@ -1726,14 +1727,14 @@ bool IfConverter::IfConvertDiamondCommon( for (unsigned i = 0; i < NumDups1; ++DI1) { if (DI1 == MBB1.end()) break; - if (!DI1->isDebugValue()) + if (!DI1->isDebugInstr()) ++i; } while (NumDups1 != 0) { ++DI2; if (DI2 == MBB2.end()) break; - if (!DI2->isDebugValue()) + if (!DI2->isDebugInstr()) --NumDups1; } @@ -1767,7 +1768,7 @@ bool IfConverter::IfConvertDiamondCommon( assert(DI1 != MBB1.begin()); --DI1; // skip dbg_value instructions - if (!DI1->isDebugValue()) + if (!DI1->isDebugInstr()) ++i; } MBB1.erase(DI1, MBB1.end()); @@ -1782,7 +1783,7 @@ bool IfConverter::IfConvertDiamondCommon( // instructions could be found. while (DI2 != MBB2.begin()) { MachineBasicBlock::iterator Prev = std::prev(DI2); - if (!Prev->isBranch() && !Prev->isDebugValue()) + if (!Prev->isBranch() && !Prev->isDebugInstr()) break; DI2 = Prev; } @@ -1793,7 +1794,7 @@ bool IfConverter::IfConvertDiamondCommon( assert(DI2 != MBB2.begin()); --DI2; // skip dbg_value instructions - if (!DI2->isDebugValue()) + if (!DI2->isDebugInstr()) --NumDups2; } @@ -1809,7 +1810,7 @@ bool IfConverter::IfConvertDiamondCommon( SmallSet<unsigned, 4> ExtUses; if (TII->isProfitableToUnpredicate(MBB1, MBB2)) { for (const MachineInstr &FI : make_range(MBB2.begin(), DI2)) { - if (FI.isDebugValue()) + if (FI.isDebugInstr()) continue; SmallVector<unsigned, 4> Defs; for (const MachineOperand &MO : FI.operands()) { @@ -2002,7 +2003,7 @@ void IfConverter::PredicateBlock(BBInfo &BBI, bool AnyUnpred = false; bool MaySpec = LaterRedefs != nullptr; for (MachineInstr &I : make_range(BBI.BB->begin(), E)) { - if (I.isDebugValue() || TII->isPredicated(I)) + if (I.isDebugInstr() || TII->isPredicated(I)) continue; // It may be possible not to predicate an instruction if it's the 'true' // side of a diamond and the 'false' side may re-define the instruction's @@ -2058,7 +2059,7 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, ToBBI.ExtraCost += NumCycles-1; ToBBI.ExtraCost2 += ExtraPredCost; - if (!TII->isPredicated(I) && !MI->isDebugValue()) { + if (!TII->isPredicated(I) && !MI->isDebugInstr()) { if (!TII->PredicateInstruction(*MI, Cond)) { #ifndef NDEBUG dbgs() << "Unable to predicate " << I << "!\n"; diff --git a/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp index 308b6d293d3d..0a447bc613b1 100644 --- a/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp +++ b/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp @@ -115,7 +115,7 @@ class ImplicitNullChecks : public MachineFunctionPass { /// \c canHandle should return true for all instructions in \p /// Insts. DependenceResult computeDependence(const MachineInstr *MI, - ArrayRef<MachineInstr *> Insts); + ArrayRef<MachineInstr *> Block); /// Represents one null check that can be made implicit. class NullCheck { @@ -134,7 +134,7 @@ class ImplicitNullChecks : public MachineFunctionPass { // The block branched to if the pointer is null. MachineBasicBlock *NullSucc; - // If this is non-null, then MemOperation has a dependency on on this + // If this is non-null, then MemOperation has a dependency on this // instruction; and it needs to be hoisted to execute before MemOperation. MachineInstr *OnlyDependency; @@ -198,7 +198,7 @@ class ImplicitNullChecks : public MachineFunctionPass { SuitabilityResult isSuitableMemoryOp(MachineInstr &MI, unsigned PointerReg, ArrayRef<MachineInstr *> PrevInsts); - /// Return true if \p FaultingMI can be hoisted from after the the + /// Return true if \p FaultingMI can be hoisted from after the /// instructions in \p InstsSeenSoFar to before them. Set \p Dependence to a /// non-null value if we also need to (and legally can) hoist a depedency. bool canHoistInst(MachineInstr *FaultingMI, unsigned PointerReg, @@ -496,6 +496,32 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks( if (NotNullSucc->pred_size() != 1) return false; + // To prevent the invalid transformation of the following code: + // + // mov %rax, %rcx + // test %rax, %rax + // %rax = ... + // je throw_npe + // mov(%rcx), %r9 + // mov(%rax), %r10 + // + // into: + // + // mov %rax, %rcx + // %rax = .... + // faulting_load_op("movl (%rax), %r10", throw_npe) + // mov(%rcx), %r9 + // + // we must ensure that there are no instructions between the 'test' and + // conditional jump that modify %rax. + const unsigned PointerReg = MBP.LHS.getReg(); + + assert(MBP.ConditionDef->getParent() == &MBB && "Should be in basic block"); + + for (auto I = MBB.rbegin(); MBP.ConditionDef != &*I; ++I) + if (I->modifiesRegister(PointerReg, TRI)) + return false; + // Starting with a code fragment like: // // test %rax, %rax @@ -550,8 +576,6 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks( // ptr could be some non-null invalid reference that never gets loaded from // because some_cond is always true. - const unsigned PointerReg = MBP.LHS.getReg(); - SmallVector<MachineInstr *, 8> InstsSeenSoFar; for (auto &MI : *NotNullSucc) { @@ -596,9 +620,8 @@ MachineInstr *ImplicitNullChecks::insertFaultingInstr( unsigned DefReg = NoRegister; if (NumDefs != 0) { - DefReg = MI->defs().begin()->getReg(); - assert(std::distance(MI->defs().begin(), MI->defs().end()) == 1 && - "expected exactly one def!"); + DefReg = MI->getOperand(0).getReg(); + assert(NumDefs == 1 && "expected exactly one def!"); } FaultMaps::FaultKind FK; diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp index 86ce4b7a9464..007e9283d833 100644 --- a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp @@ -46,6 +46,7 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/Config/llvm-config.h" #include "llvm/Support/BlockFrequency.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" @@ -335,7 +336,7 @@ void InlineSpiller::collectRegsToSpill() { if (isRegToSpill(SnipReg)) continue; RegsToSpill.push_back(SnipReg); - DEBUG(dbgs() << "\talso spill snippet " << SnipLI << '\n'); + LLVM_DEBUG(dbgs() << "\talso spill snippet " << SnipLI << '\n'); ++NumSnippets; } } @@ -387,8 +388,8 @@ bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI, LiveInterval &OrigLI = LIS.getInterval(Original); VNInfo *OrigVNI = OrigLI.getVNInfoAt(Idx); StackInt->MergeValueInAsValue(OrigLI, OrigVNI, StackInt->getValNumInfo(0)); - DEBUG(dbgs() << "\tmerged orig valno " << OrigVNI->id << ": " - << *StackInt << '\n'); + LLVM_DEBUG(dbgs() << "\tmerged orig valno " << OrigVNI->id << ": " + << *StackInt << '\n'); // We are going to spill SrcVNI immediately after its def, so clear out // any later spills of the same value. @@ -409,7 +410,7 @@ bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI, MRI.getRegClass(SrcReg), &TRI); --MII; // Point to store instruction. LIS.InsertMachineInstrInMaps(*MII); - DEBUG(dbgs() << "\thoisted: " << SrcVNI->def << '\t' << *MII); + LLVM_DEBUG(dbgs() << "\thoisted: " << SrcVNI->def << '\t' << *MII); HSpiller.addToMergeableSpills(*MII, StackSlot, Original); ++NumSpills; @@ -428,8 +429,8 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) { LiveInterval *LI; std::tie(LI, VNI) = WorkList.pop_back_val(); unsigned Reg = LI->reg; - DEBUG(dbgs() << "Checking redundant spills for " - << VNI->id << '@' << VNI->def << " in " << *LI << '\n'); + LLVM_DEBUG(dbgs() << "Checking redundant spills for " << VNI->id << '@' + << VNI->def << " in " << *LI << '\n'); // Regs to spill are taken care of. if (isRegToSpill(Reg)) @@ -437,7 +438,7 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) { // Add all of VNI's live range to StackInt. StackInt->MergeValueInAsValue(*LI, VNI, StackInt->getValNumInfo(0)); - DEBUG(dbgs() << "Merged to stack int: " << *StackInt << '\n'); + LLVM_DEBUG(dbgs() << "Merged to stack int: " << *StackInt << '\n'); // Find all spills and copies of VNI. for (MachineRegisterInfo::use_instr_nodbg_iterator @@ -465,7 +466,7 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) { // Erase spills. int FI; if (Reg == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot) { - DEBUG(dbgs() << "Redundant spill " << Idx << '\t' << MI); + LLVM_DEBUG(dbgs() << "Redundant spill " << Idx << '\t' << MI); // eliminateDeadDefs won't normally remove stores, so switch opcode. MI.setDesc(TII.get(TargetOpcode::KILL)); DeadDefs.push_back(&MI); @@ -527,13 +528,13 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) { VNInfo *ParentVNI = VirtReg.getVNInfoAt(UseIdx.getBaseIndex()); if (!ParentVNI) { - DEBUG(dbgs() << "\tadding <undef> flags: "); + LLVM_DEBUG(dbgs() << "\tadding <undef> flags: "); for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) MO.setIsUndef(); } - DEBUG(dbgs() << UseIdx << '\t' << MI); + LLVM_DEBUG(dbgs() << UseIdx << '\t' << MI); return true; } @@ -547,7 +548,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) { if (!Edit->canRematerializeAt(RM, OrigVNI, UseIdx, false)) { markValueUsed(&VirtReg, ParentVNI); - DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << MI); + LLVM_DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << MI); return false; } @@ -555,7 +556,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) { // same register for uses and defs. if (RI.Tied) { markValueUsed(&VirtReg, ParentVNI); - DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << MI); + LLVM_DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << MI); return false; } @@ -581,8 +582,8 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) { NewMI->setDebugLoc(MI.getDebugLoc()); (void)DefIdx; - DEBUG(dbgs() << "\tremat: " << DefIdx << '\t' - << *LIS.getInstructionFromIndex(DefIdx)); + LLVM_DEBUG(dbgs() << "\tremat: " << DefIdx << '\t' + << *LIS.getInstructionFromIndex(DefIdx)); // Replace operands for (const auto &OpPair : Ops) { @@ -592,7 +593,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) { MO.setIsKill(); } } - DEBUG(dbgs() << "\t " << UseIdx << '\t' << MI << '\n'); + LLVM_DEBUG(dbgs() << "\t " << UseIdx << '\t' << MI << '\n'); ++NumRemats; return true; @@ -619,6 +620,9 @@ void InlineSpiller::reMaterializeAll() { if (MI.isDebugValue()) continue; + assert(!MI.isDebugInstr() && "Did not expect to find a use in debug " + "instruction that isn't a DBG_VALUE"); + anyRemat |= reMaterializeFor(LI, MI); } } @@ -637,7 +641,7 @@ void InlineSpiller::reMaterializeAll() { MI->addRegisterDead(Reg, &TRI); if (!MI->allDefsAreDead()) continue; - DEBUG(dbgs() << "All defs dead: " << *MI); + LLVM_DEBUG(dbgs() << "All defs dead: " << *MI); DeadDefs.push_back(MI); } } @@ -646,7 +650,7 @@ void InlineSpiller::reMaterializeAll() { // deleted here. if (DeadDefs.empty()) return; - DEBUG(dbgs() << "Remat created " << DeadDefs.size() << " dead defs.\n"); + LLVM_DEBUG(dbgs() << "Remat created " << DeadDefs.size() << " dead defs.\n"); Edit->eliminateDeadDefs(DeadDefs, RegsToSpill, AA); // LiveRangeEdit::eliminateDeadDef is used to remove dead define instructions @@ -669,7 +673,8 @@ void InlineSpiller::reMaterializeAll() { RegsToSpill[ResultPos++] = Reg; } RegsToSpill.erase(RegsToSpill.begin() + ResultPos, RegsToSpill.end()); - DEBUG(dbgs() << RegsToSpill.size() << " registers to spill after remat.\n"); + LLVM_DEBUG(dbgs() << RegsToSpill.size() + << " registers to spill after remat.\n"); } //===----------------------------------------------------------------------===// @@ -691,7 +696,7 @@ bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) { if (!IsLoad) HSpiller.rmFromMergeableSpills(*MI, StackSlot); - DEBUG(dbgs() << "Coalescing stack access: " << *MI); + LLVM_DEBUG(dbgs() << "Coalescing stack access: " << *MI); LIS.RemoveMachineInstrFromMaps(*MI); MI->eraseFromParent(); @@ -848,8 +853,8 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops, FoldMI->RemoveOperand(i - 1); } - DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MIS.end(), LIS, - "folded")); + LLVM_DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MIS.end(), LIS, + "folded")); if (!WasCopy) ++NumFolded; @@ -872,8 +877,8 @@ void InlineSpiller::insertReload(unsigned NewVReg, LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MI); - DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MI, LIS, "reload", - NewVReg)); + LLVM_DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MI, LIS, "reload", + NewVReg)); ++NumReloads; } @@ -912,8 +917,8 @@ void InlineSpiller::insertSpill(unsigned NewVReg, bool isKill, LIS.InsertMachineInstrRangeInMaps(std::next(MI), MIS.end()); - DEBUG(dumpMachineInstrRangeWithSlotIndex(std::next(MI), MIS.end(), LIS, - "spill")); + LLVM_DEBUG(dumpMachineInstrRangeWithSlotIndex(std::next(MI), MIS.end(), LIS, + "spill")); ++NumSpills; if (IsRealSpill) HSpiller.addToMergeableSpills(*std::next(MI), StackSlot, Original); @@ -921,7 +926,7 @@ void InlineSpiller::insertSpill(unsigned NewVReg, bool isKill, /// spillAroundUses - insert spill code around each use of Reg. void InlineSpiller::spillAroundUses(unsigned Reg) { - DEBUG(dbgs() << "spillAroundUses " << printReg(Reg) << '\n'); + LLVM_DEBUG(dbgs() << "spillAroundUses " << printReg(Reg) << '\n'); LiveInterval &OldLI = LIS.getInterval(Reg); // Iterate over instructions using Reg. @@ -934,12 +939,15 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { if (MI->isDebugValue()) { // Modify DBG_VALUE now that the value is in a spill slot. MachineBasicBlock *MBB = MI->getParent(); - DEBUG(dbgs() << "Modifying debug info due to spill:\t" << *MI); + LLVM_DEBUG(dbgs() << "Modifying debug info due to spill:\t" << *MI); buildDbgValueForSpill(*MBB, MI, *MI, StackSlot); MBB->erase(MI); continue; } + assert(!MI->isDebugInstr() && "Did not expect to find a use in debug " + "instruction that isn't a DBG_VALUE"); + // Ignore copies to/from snippets. We'll delete them. if (SnippetCopies.count(MI)) continue; @@ -965,7 +973,7 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { if (SibReg && isSibling(SibReg)) { // This may actually be a copy between snippets. if (isRegToSpill(SibReg)) { - DEBUG(dbgs() << "Found new snippet copy: " << *MI); + LLVM_DEBUG(dbgs() << "Found new snippet copy: " << *MI); SnippetCopies.insert(MI); continue; } @@ -1008,7 +1016,7 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { hasLiveDef = true; } } - DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI << '\n'); + LLVM_DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI << '\n'); // FIXME: Use a second vreg if instruction has no tied ops. if (RI.Writes) @@ -1034,7 +1042,7 @@ void InlineSpiller::spillAll() { for (unsigned Reg : RegsToSpill) StackInt->MergeSegmentsInAsValue(LIS.getInterval(Reg), StackInt->getValNumInfo(0)); - DEBUG(dbgs() << "Merged spilled regs: " << *StackInt << '\n'); + LLVM_DEBUG(dbgs() << "Merged spilled regs: " << *StackInt << '\n'); // Spill around uses of all RegsToSpill. for (unsigned Reg : RegsToSpill) @@ -1042,7 +1050,7 @@ void InlineSpiller::spillAll() { // Hoisted spills may cause dead code. if (!DeadDefs.empty()) { - DEBUG(dbgs() << "Eliminating " << DeadDefs.size() << " dead defs\n"); + LLVM_DEBUG(dbgs() << "Eliminating " << DeadDefs.size() << " dead defs\n"); Edit->eliminateDeadDefs(DeadDefs, RegsToSpill, AA); } @@ -1074,10 +1082,10 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { StackSlot = VRM.getStackSlot(Original); StackInt = nullptr; - DEBUG(dbgs() << "Inline spilling " - << TRI.getRegClassName(MRI.getRegClass(edit.getReg())) - << ':' << edit.getParent() - << "\nFrom original " << printReg(Original) << '\n'); + LLVM_DEBUG(dbgs() << "Inline spilling " + << TRI.getRegClassName(MRI.getRegClass(edit.getReg())) + << ':' << edit.getParent() << "\nFrom original " + << printReg(Original) << '\n'); assert(edit.getParent().isSpillable() && "Attempting to spill already spilled value."); assert(DeadDefs.empty() && "Previous spill didn't remove dead defs"); @@ -1261,11 +1269,11 @@ void HoistSpillHelper::getVisitOrders( "Orders have different size with WorkSet"); #ifndef NDEBUG - DEBUG(dbgs() << "Orders size is " << Orders.size() << "\n"); + LLVM_DEBUG(dbgs() << "Orders size is " << Orders.size() << "\n"); SmallVector<MachineDomTreeNode *, 32>::reverse_iterator RIt = Orders.rbegin(); for (; RIt != Orders.rend(); RIt++) - DEBUG(dbgs() << "BB" << (*RIt)->getBlock()->getNumber() << ","); - DEBUG(dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "BB" << (*RIt)->getBlock()->getNumber() << ","); + LLVM_DEBUG(dbgs() << "\n"); #endif } @@ -1374,7 +1382,7 @@ void HoistSpillHelper::runHoistSpills( // Current Block is the BB containing the new hoisted spill. Add it to // SpillsToKeep. LiveReg is the source of the new spill. SpillsToKeep[*RIt] = LiveReg; - DEBUG({ + LLVM_DEBUG({ dbgs() << "spills in BB: "; for (const auto Rspill : SpillsInSubTree) dbgs() << Rspill->getBlock()->getNumber() << " "; @@ -1430,7 +1438,7 @@ void HoistSpillHelper::hoistAllSpills() { if (Ent.second.empty()) continue; - DEBUG({ + LLVM_DEBUG({ dbgs() << "\nFor Slot" << Slot << " and VN" << OrigVNI->id << ":\n" << "Equal spills in BB: "; for (const auto spill : EqValSpills) @@ -1445,7 +1453,7 @@ void HoistSpillHelper::hoistAllSpills() { runHoistSpills(OrigLI, *OrigVNI, EqValSpills, SpillsToRm, SpillsToIns); - DEBUG({ + LLVM_DEBUG({ dbgs() << "Finally inserted spills in BB: "; for (const auto Ispill : SpillsToIns) dbgs() << Ispill.first->getNumber() << " "; diff --git a/contrib/llvm/lib/CodeGen/InterferenceCache.cpp b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp index 72227cc7bba9..82f6e8d8e234 100644 --- a/contrib/llvm/lib/CodeGen/InterferenceCache.cpp +++ b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp @@ -48,8 +48,8 @@ void InterferenceCache::reinitPhysRegEntries() { if (PhysRegEntriesCount == TRI->getNumRegs()) return; free(PhysRegEntries); PhysRegEntriesCount = TRI->getNumRegs(); - PhysRegEntries = (unsigned char*) - calloc(PhysRegEntriesCount, sizeof(unsigned char)); + PhysRegEntries = static_cast<unsigned char*>( + safe_calloc(PhysRegEntriesCount, sizeof(unsigned char))); } void InterferenceCache::init(MachineFunction *mf, diff --git a/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp index 9c906d309639..fd2ff162630a 100644 --- a/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -104,15 +104,15 @@ private: /// The maximum supported interleave factor. unsigned MaxFactor; - /// \brief Transform an interleaved load into target specific intrinsics. + /// Transform an interleaved load into target specific intrinsics. bool lowerInterleavedLoad(LoadInst *LI, SmallVector<Instruction *, 32> &DeadInsts); - /// \brief Transform an interleaved store into target specific intrinsics. + /// Transform an interleaved store into target specific intrinsics. bool lowerInterleavedStore(StoreInst *SI, SmallVector<Instruction *, 32> &DeadInsts); - /// \brief Returns true if the uses of an interleaved load by the + /// Returns true if the uses of an interleaved load by the /// extractelement instructions in \p Extracts can be replaced by uses of the /// shufflevector instructions in \p Shuffles instead. If so, the necessary /// replacements are also performed. @@ -136,7 +136,7 @@ FunctionPass *llvm::createInterleavedAccessPass() { return new InterleavedAccess(); } -/// \brief Check if the mask is a DE-interleave mask of the given factor +/// Check if the mask is a DE-interleave mask of the given factor /// \p Factor like: /// <Index, Index+Factor, ..., Index+(NumElts-1)*Factor> static bool isDeInterleaveMaskOfFactor(ArrayRef<int> Mask, unsigned Factor, @@ -158,7 +158,7 @@ static bool isDeInterleaveMaskOfFactor(ArrayRef<int> Mask, unsigned Factor, return false; } -/// \brief Check if the mask is a DE-interleave mask for an interleaved load. +/// Check if the mask is a DE-interleave mask for an interleaved load. /// /// E.g. DE-interleave masks (Factor = 2) could be: /// <0, 2, 4, 6> (mask of index 0 to extract even elements) @@ -176,7 +176,7 @@ static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor, return false; } -/// \brief Check if the mask can be used in an interleaved store. +/// Check if the mask can be used in an interleaved store. // /// It checks for a more general pattern than the RE-interleave mask. /// I.e. <x, y, ... z, x+1, y+1, ...z+1, x+2, y+2, ...z+2, ...> @@ -332,7 +332,7 @@ bool InterleavedAccess::lowerInterleavedLoad( if (!tryReplaceExtracts(Extracts, Shuffles)) return false; - DEBUG(dbgs() << "IA: Found an interleaved load: " << *LI << "\n"); + LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *LI << "\n"); // Try to create target specific intrinsics to replace the load and shuffles. if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor)) @@ -424,7 +424,7 @@ bool InterleavedAccess::lowerInterleavedStore( if (!isReInterleaveMask(SVI->getShuffleMask(), Factor, MaxFactor, OpNumElts)) return false; - DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n"); + LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n"); // Try to create target specific intrinsics to replace the store and shuffle. if (!TLI->lowerInterleavedStore(SI, SVI, Factor)) @@ -441,7 +441,7 @@ bool InterleavedAccess::runOnFunction(Function &F) { if (!TPC || !LowerInterleavedAccesses) return false; - DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName() << "\n"); + LLVM_DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName() << "\n"); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); auto &TM = TPC->getTM<TargetMachine>(); diff --git a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp index 12777d5ed110..eb4099964242 100644 --- a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp +++ b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp @@ -456,6 +456,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { } case Intrinsic::dbg_declare: + case Intrinsic::dbg_label: break; // Simply strip out debugging intrinsics case Intrinsic::eh_typeid_for: diff --git a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp index 4c6e21ab315a..2cd389ce2c11 100644 --- a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp +++ b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp @@ -16,7 +16,6 @@ #include "llvm/CodeGen/BasicTTIImpl.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/TargetLoweringObjectFile.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/MC/MCAsmBackend.h" @@ -24,16 +23,22 @@ #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; +static cl::opt<bool> EnableTrapUnreachable("trap-unreachable", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Enable generating trap for unreachable")); + void LLVMTargetMachine::initAsmInfo() { MRI = TheTarget.createMCRegInfo(getTargetTriple().str()); MII = TheTarget.createMCInstrInfo(); @@ -79,6 +84,9 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T, this->RM = RM; this->CMModel = CM; this->OptLevel = OL; + + if (EnableTrapUnreachable) + this->Options.TrapUnreachable = true; } TargetTransformInfo @@ -113,8 +121,10 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM, } bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM, - raw_pwrite_stream &Out, CodeGenFileType FileType, - MCContext &Context) { + raw_pwrite_stream &Out, + raw_pwrite_stream *DwoOut, + CodeGenFileType FileType, + MCContext &Context) { if (Options.MCOptions.MCSaveTempLabels) Context.setAllowTemporaryLabels(false); @@ -131,17 +141,17 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM, getTargetTriple(), MAI.getAssemblerDialect(), MAI, MII, MRI); // Create a code emitter if asked to show the encoding. - MCCodeEmitter *MCE = nullptr; + std::unique_ptr<MCCodeEmitter> MCE; if (Options.MCOptions.ShowMCEncoding) - MCE = getTarget().createMCCodeEmitter(MII, MRI, Context); + MCE.reset(getTarget().createMCCodeEmitter(MII, MRI, Context)); - MCAsmBackend *MAB = - getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions); + std::unique_ptr<MCAsmBackend> MAB( + getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions)); auto FOut = llvm::make_unique<formatted_raw_ostream>(Out); MCStreamer *S = getTarget().createAsmStreamer( Context, std::move(FOut), Options.MCOptions.AsmVerbose, - Options.MCOptions.MCUseDwarfDirectory, InstPrinter, MCE, MAB, - Options.MCOptions.ShowMCInst); + Options.MCOptions.MCUseDwarfDirectory, InstPrinter, std::move(MCE), + std::move(MAB), Options.MCOptions.ShowMCInst); AsmStreamer.reset(S); break; } @@ -159,7 +169,9 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM, Triple T(getTargetTriple().str()); AsmStreamer.reset(getTarget().createMCObjectStreamer( - T, Context, std::unique_ptr<MCAsmBackend>(MAB), Out, + T, Context, std::unique_ptr<MCAsmBackend>(MAB), + DwoOut ? MAB->createDwoObjectWriter(Out, *DwoOut) + : MAB->createObjectWriter(Out), std::unique_ptr<MCCodeEmitter>(MCE), STI, Options.MCOptions.MCRelaxAll, Options.MCOptions.MCIncrementalLinkerCompatible, /*DWARFMustBeAtTheEnd*/ true)); @@ -184,6 +196,7 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM, bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, raw_pwrite_stream &Out, + raw_pwrite_stream *DwoOut, CodeGenFileType FileType, bool DisableVerify, MachineModuleInfo *MMI) { @@ -194,7 +207,8 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, if (!Context) return true; - if (WillCompleteCodeGenPipeline && addAsmPrinter(PM, Out, FileType, *Context)) + if (WillCompleteCodeGenPipeline && + addAsmPrinter(PM, Out, DwoOut, FileType, *Context)) return true; PM.add(createFreeMachineFunctionPass()); @@ -234,7 +248,7 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx, const Triple &T = getTargetTriple(); std::unique_ptr<MCStreamer> AsmStreamer(getTarget().createMCObjectStreamer( - T, *Ctx, std::unique_ptr<MCAsmBackend>(MAB), Out, + T, *Ctx, std::unique_ptr<MCAsmBackend>(MAB), MAB->createObjectWriter(Out), std::unique_ptr<MCCodeEmitter>(MCE), STI, Options.MCOptions.MCRelaxAll, Options.MCOptions.MCIncrementalLinkerCompatible, /*DWARFMustBeAtTheEnd*/ true)); diff --git a/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp index 8ffd51a550fc..5dbce841cfd5 100644 --- a/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp +++ b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/LatencyPriorityQueue.h" +#include "llvm/Config/llvm-config.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -139,3 +140,14 @@ void LatencyPriorityQueue::remove(SUnit *SU) { std::swap(*I, Queue.back()); Queue.pop_back(); } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const { + dbgs() << "Latency Priority Queue\n"; + dbgs() << " Number of Queue Entries: " << Queue.size() << "\n"; + for (auto const &SU : Queue) { + dbgs() << " "; + SU->dump(DAG); + } +} +#endif diff --git a/contrib/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp b/contrib/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp index 996d40ca6e1e..5b52cc66a297 100644 --- a/contrib/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp +++ b/contrib/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp @@ -57,23 +57,23 @@ MachineBlockFrequencyInfo & LazyMachineBlockFrequencyInfoPass::calculateIfNotAvailable() const { auto *MBFI = getAnalysisIfAvailable<MachineBlockFrequencyInfo>(); if (MBFI) { - DEBUG(dbgs() << "MachineBlockFrequencyInfo is available\n"); + LLVM_DEBUG(dbgs() << "MachineBlockFrequencyInfo is available\n"); return *MBFI; } auto &MBPI = getAnalysis<MachineBranchProbabilityInfo>(); auto *MLI = getAnalysisIfAvailable<MachineLoopInfo>(); auto *MDT = getAnalysisIfAvailable<MachineDominatorTree>(); - DEBUG(dbgs() << "Building MachineBlockFrequencyInfo on the fly\n"); - DEBUG(if (MLI) dbgs() << "LoopInfo is available\n"); + LLVM_DEBUG(dbgs() << "Building MachineBlockFrequencyInfo on the fly\n"); + LLVM_DEBUG(if (MLI) dbgs() << "LoopInfo is available\n"); if (!MLI) { - DEBUG(dbgs() << "Building LoopInfo on the fly\n"); + LLVM_DEBUG(dbgs() << "Building LoopInfo on the fly\n"); // First create a dominator tree. - DEBUG(if (MDT) dbgs() << "DominatorTree is available\n"); + LLVM_DEBUG(if (MDT) dbgs() << "DominatorTree is available\n"); if (!MDT) { - DEBUG(dbgs() << "Building DominatorTree on the fly\n"); + LLVM_DEBUG(dbgs() << "Building DominatorTree on the fly\n"); OwnedMDT = make_unique<MachineDominatorTree>(); OwnedMDT->getBase().recalculate(*MF); MDT = OwnedMDT.get(); diff --git a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp index 8c54751ee833..d06821bdfcce 100644 --- a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp +++ b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/Casting.h" diff --git a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp index 19ec281079cb..fea83e92de8f 100644 --- a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp +++ b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp @@ -40,6 +40,8 @@ #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" @@ -64,7 +66,7 @@ using namespace llvm; STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted"); -// \brief If @MI is a DBG_VALUE with debug value described by a defined +// If @MI is a DBG_VALUE with debug value described by a defined // register, returns the number of this register. In the other case, returns 0. static unsigned isDbgValueDescribedByReg(const MachineInstr &MI) { assert(MI.isDebugValue() && "expected a DBG_VALUE"); @@ -81,6 +83,7 @@ private: const TargetRegisterInfo *TRI; const TargetInstrInfo *TII; const TargetFrameLowering *TFI; + BitVector CalleeSavedRegs; LexicalScopes LS; /// Keeps track of lexical scopes associated with a user value's source @@ -178,11 +181,11 @@ private: using VarLocMap = UniqueVector<VarLoc>; using VarLocSet = SparseBitVector<>; using VarLocInMBB = SmallDenseMap<const MachineBasicBlock *, VarLocSet>; - struct SpillDebugPair { - MachineInstr *SpillInst; + struct TransferDebugPair { + MachineInstr *TransferInst; MachineInstr *DebugInst; }; - using SpillMap = SmallVector<SpillDebugPair, 4>; + using TransferMap = SmallVector<TransferDebugPair, 4>; /// This holds the working set of currently open ranges. For fast /// access, this is done both as a set of VarLocIDs, and a map of @@ -235,18 +238,23 @@ private: bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF, unsigned &Reg); int extractSpillBaseRegAndOffset(const MachineInstr &MI, unsigned &Reg); + void insertTransferDebugPair(MachineInstr &MI, OpenRangesSet &OpenRanges, + TransferMap &Transfers, VarLocMap &VarLocIDs, + unsigned OldVarID, unsigned NewReg = 0); void transferDebugValue(const MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs); void transferSpillInst(MachineInstr &MI, OpenRangesSet &OpenRanges, - VarLocMap &VarLocIDs, SpillMap &Spills); + VarLocMap &VarLocIDs, TransferMap &Transfers); + void transferRegisterCopy(MachineInstr &MI, OpenRangesSet &OpenRanges, + VarLocMap &VarLocIDs, TransferMap &Transfers); void transferRegisterDef(MachineInstr &MI, OpenRangesSet &OpenRanges, const VarLocMap &VarLocIDs); bool transferTerminatorInst(MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocInMBB &OutLocs, const VarLocMap &VarLocIDs); - bool transfer(MachineInstr &MI, OpenRangesSet &OpenRanges, - VarLocInMBB &OutLocs, VarLocMap &VarLocIDs, SpillMap &Spills, - bool transferSpills); + bool process(MachineInstr &MI, OpenRangesSet &OpenRanges, + VarLocInMBB &OutLocs, VarLocMap &VarLocIDs, + TransferMap &Transfers, bool transferChanges); bool join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs, const VarLocMap &VarLocIDs, @@ -369,6 +377,54 @@ void LiveDebugValues::transferDebugValue(const MachineInstr &MI, } } +/// Create new TransferDebugPair and insert it in \p Transfers. The VarLoc +/// with \p OldVarID should be deleted form \p OpenRanges and replaced with +/// new VarLoc. If \p NewReg is different than default zero value then the +/// new location will be register location created by the copy like instruction, +/// otherwise it is variable's location on the stack. +void LiveDebugValues::insertTransferDebugPair( + MachineInstr &MI, OpenRangesSet &OpenRanges, TransferMap &Transfers, + VarLocMap &VarLocIDs, unsigned OldVarID, unsigned NewReg) { + const MachineInstr *DMI = &VarLocIDs[OldVarID].MI; + MachineFunction *MF = MI.getParent()->getParent(); + MachineInstr *NewDMI; + if (NewReg) { + // Create a DBG_VALUE instruction to describe the Var in its new + // register location. + NewDMI = BuildMI(*MF, DMI->getDebugLoc(), DMI->getDesc(), + DMI->isIndirectDebugValue(), NewReg, + DMI->getDebugVariable(), DMI->getDebugExpression()); + if (DMI->isIndirectDebugValue()) + NewDMI->getOperand(1).setImm(DMI->getOperand(1).getImm()); + LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for register copy: "; + NewDMI->print(dbgs(), false, false, false, TII)); + } else { + // Create a DBG_VALUE instruction to describe the Var in its spilled + // location. + unsigned SpillBase; + int SpillOffset = extractSpillBaseRegAndOffset(MI, SpillBase); + auto *SpillExpr = DIExpression::prepend(DMI->getDebugExpression(), + DIExpression::NoDeref, SpillOffset); + NewDMI = BuildMI(*MF, DMI->getDebugLoc(), DMI->getDesc(), true, SpillBase, + DMI->getDebugVariable(), SpillExpr); + LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for spill: "; + NewDMI->print(dbgs(), false, false, false, TII)); + } + + // The newly created DBG_VALUE instruction NewDMI must be inserted after + // MI. Keep track of the pairing. + TransferDebugPair MIP = {&MI, NewDMI}; + Transfers.push_back(MIP); + + // End all previous ranges of Var. + OpenRanges.erase(VarLocIDs[OldVarID].Var); + + // Add the VarLoc to OpenRanges. + VarLoc VL(*NewDMI, LS); + unsigned LocID = VarLocIDs.insert(VL); + OpenRanges.insert(LocID, VL.Var); +} + /// A definition of a register may mark the end of a range. void LiveDebugValues::transferRegisterDef(MachineInstr &MI, OpenRangesSet &OpenRanges, @@ -426,28 +482,51 @@ bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI, FrameInfo.isSpillSlotObjectIndex(FI))) return false; - // In a spill instruction generated by the InlineSpiller the spilled register - // has its kill flag set. Return false if we don't find such a register. - Reg = 0; + auto isKilledReg = [&](const MachineOperand MO, unsigned &Reg) { + if (!MO.isReg() || !MO.isUse()) { + Reg = 0; + return false; + } + Reg = MO.getReg(); + return MO.isKill(); + }; + for (const MachineOperand &MO : MI.operands()) { - if (MO.isReg() && MO.isUse() && MO.isKill()) { - Reg = MO.getReg(); - break; + // In a spill instruction generated by the InlineSpiller the spilled + // register has its kill flag set. + if (isKilledReg(MO, Reg)) + return true; + if (Reg != 0) { + // Check whether next instruction kills the spilled register. + // FIXME: Current solution does not cover search for killed register in + // bundles and instructions further down the chain. + auto NextI = std::next(MI.getIterator()); + // Skip next instruction that points to basic block end iterator. + if (MI.getParent()->end() == NextI) + continue; + unsigned RegNext; + for (const MachineOperand &MONext : NextI->operands()) { + // Return true if we came across the register from the + // previous spill instruction that is killed in NextI. + if (isKilledReg(MONext, RegNext) && RegNext == Reg) + return true; + } } } - return Reg != 0; + // Return false if we didn't find spilled register. + return false; } /// A spilled register may indicate that we have to end the current range of /// a variable and create a new one for the spill location. -/// We don't want to insert any instructions in transfer(), so we just create -/// the DBG_VALUE witout inserting it and keep track of it in @Spills. +/// We don't want to insert any instructions in process(), so we just create +/// the DBG_VALUE without inserting it and keep track of it in \p Transfers. /// It will be inserted into the BB when we're done iterating over the /// instructions. void LiveDebugValues::transferSpillInst(MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs, - SpillMap &Spills) { + TransferMap &Transfers) { unsigned Reg; MachineFunction *MF = MI.getMF(); if (!isSpillInstruction(MI, MF, Reg)) @@ -456,35 +535,49 @@ void LiveDebugValues::transferSpillInst(MachineInstr &MI, // Check if the register is the location of a debug value. for (unsigned ID : OpenRanges.getVarLocs()) { if (VarLocIDs[ID].isDescribedByReg() == Reg) { - DEBUG(dbgs() << "Spilling Register " << printReg(Reg, TRI) << '(' - << VarLocIDs[ID].Var.getVar()->getName() << ")\n"); - - // Create a DBG_VALUE instruction to describe the Var in its spilled - // location, but don't insert it yet to avoid invalidating the - // iterator in our caller. - unsigned SpillBase; - int SpillOffset = extractSpillBaseRegAndOffset(MI, SpillBase); - const MachineInstr *DMI = &VarLocIDs[ID].MI; - auto *SpillExpr = DIExpression::prepend( - DMI->getDebugExpression(), DIExpression::NoDeref, SpillOffset); - MachineInstr *SpDMI = - BuildMI(*MF, DMI->getDebugLoc(), DMI->getDesc(), true, SpillBase, - DMI->getDebugVariable(), SpillExpr); - DEBUG(dbgs() << "Creating DBG_VALUE inst for spill: "; - SpDMI->print(dbgs(), false, TII)); - - // The newly created DBG_VALUE instruction SpDMI must be inserted after - // MI. Keep track of the pairing. - SpillDebugPair MIP = {&MI, SpDMI}; - Spills.push_back(MIP); - - // End all previous ranges of Var. - OpenRanges.erase(VarLocIDs[ID].Var); - - // Add the VarLoc to OpenRanges. - VarLoc VL(*SpDMI, LS); - unsigned SpillLocID = VarLocIDs.insert(VL); - OpenRanges.insert(SpillLocID, VL.Var); + LLVM_DEBUG(dbgs() << "Spilling Register " << printReg(Reg, TRI) << '(' + << VarLocIDs[ID].Var.getVar()->getName() << ")\n"); + insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, ID); + return; + } + } +} + +/// If \p MI is a register copy instruction, that copies a previously tracked +/// value from one register to another register that is callee saved, we +/// create new DBG_VALUE instruction described with copy destination register. +void LiveDebugValues::transferRegisterCopy(MachineInstr &MI, + OpenRangesSet &OpenRanges, + VarLocMap &VarLocIDs, + TransferMap &Transfers) { + const MachineOperand *SrcRegOp, *DestRegOp; + + if (!TII->isCopyInstr(MI, SrcRegOp, DestRegOp) || !SrcRegOp->isKill() || + !DestRegOp->isDef()) + return; + + auto isCalleSavedReg = [&](unsigned Reg) { + for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI) + if (CalleeSavedRegs.test(*RAI)) + return true; + return false; + }; + + unsigned SrcReg = SrcRegOp->getReg(); + unsigned DestReg = DestRegOp->getReg(); + + // We want to recognize instructions where destination register is callee + // saved register. If register that could be clobbered by the call is + // included, there would be a great chance that it is going to be clobbered + // soon. It is more likely that previous register location, which is callee + // saved, is going to stay unclobbered longer, even if it is killed. + if (!isCalleSavedReg(DestReg)) + return; + + for (unsigned ID : OpenRanges.getVarLocs()) { + if (VarLocIDs[ID].isDescribedByReg() == SrcReg) { + insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, ID, + DestReg); return; } } @@ -497,16 +590,18 @@ bool LiveDebugValues::transferTerminatorInst(MachineInstr &MI, const VarLocMap &VarLocIDs) { bool Changed = false; const MachineBasicBlock *CurMBB = MI.getParent(); - if (!(MI.isTerminator() || (&MI == &CurMBB->instr_back()))) + if (!(MI.isTerminator() || (&MI == &CurMBB->back()))) return false; if (OpenRanges.empty()) return false; - DEBUG(for (unsigned ID : OpenRanges.getVarLocs()) { - // Copy OpenRanges to OutLocs, if not already present. - dbgs() << "Add to OutLocs: "; VarLocIDs[ID].dump(); - }); + LLVM_DEBUG(for (unsigned ID + : OpenRanges.getVarLocs()) { + // Copy OpenRanges to OutLocs, if not already present. + dbgs() << "Add to OutLocs: "; + VarLocIDs[ID].dump(); + }); VarLocSet &VLS = OutLocs[CurMBB]; Changed = VLS |= OpenRanges.getVarLocs(); OpenRanges.clear(); @@ -514,14 +609,16 @@ bool LiveDebugValues::transferTerminatorInst(MachineInstr &MI, } /// This routine creates OpenRanges and OutLocs. -bool LiveDebugValues::transfer(MachineInstr &MI, OpenRangesSet &OpenRanges, - VarLocInMBB &OutLocs, VarLocMap &VarLocIDs, - SpillMap &Spills, bool transferSpills) { +bool LiveDebugValues::process(MachineInstr &MI, OpenRangesSet &OpenRanges, + VarLocInMBB &OutLocs, VarLocMap &VarLocIDs, + TransferMap &Transfers, bool transferChanges) { bool Changed = false; transferDebugValue(MI, OpenRanges, VarLocIDs); transferRegisterDef(MI, OpenRanges, VarLocIDs); - if (transferSpills) - transferSpillInst(MI, OpenRanges, VarLocIDs, Spills); + if (transferChanges) { + transferRegisterCopy(MI, OpenRanges, VarLocIDs, Transfers); + transferSpillInst(MI, OpenRanges, VarLocIDs, Transfers); + } Changed = transferTerminatorInst(MI, OpenRanges, OutLocs, VarLocIDs); return Changed; } @@ -532,7 +629,7 @@ bool LiveDebugValues::transfer(MachineInstr &MI, OpenRangesSet &OpenRanges, bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs, const VarLocMap &VarLocIDs, SmallPtrSet<const MachineBasicBlock *, 16> &Visited) { - DEBUG(dbgs() << "join MBB: " << MBB.getName() << "\n"); + LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getName() << "\n"); bool Changed = false; VarLocSet InLocsT; // Temporary incoming locations. @@ -583,7 +680,7 @@ bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, for (auto ID : Diff) { // This VarLoc is not found in InLocs i.e. it is not yet inserted. So, a // new range is started for the var from the mbb's beginning by inserting - // a new DBG_VALUE. transfer() will end this range however appropriate. + // a new DBG_VALUE. process() will end this range however appropriate. const VarLoc &DiffIt = VarLocIDs[ID]; const MachineInstr *DMI = &DiffIt.MI; MachineInstr *MI = @@ -592,7 +689,7 @@ bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, DMI->getDebugVariable(), DMI->getDebugExpression()); if (DMI->isIndirectDebugValue()) MI->getOperand(1).setImm(DMI->getOperand(1).getImm()); - DEBUG(dbgs() << "Inserted: "; MI->dump();); + LLVM_DEBUG(dbgs() << "Inserted: "; MI->dump();); ILS.set(ID); ++NumInserted; Changed = true; @@ -603,7 +700,7 @@ bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, /// Calculate the liveness information for the given machine function and /// extend ranges across basic blocks. bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { - DEBUG(dbgs() << "\nDebug Range Extension\n"); + LLVM_DEBUG(dbgs() << "\nDebug Range Extension\n"); bool Changed = false; bool OLChanged = false; @@ -613,7 +710,7 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { OpenRangesSet OpenRanges; // Ranges that are open until end of bb. VarLocInMBB OutLocs; // Ranges that exist beyond bb. VarLocInMBB InLocs; // Ranges that are incoming after joining. - SpillMap Spills; // DBG_VALUEs associated with spills. + TransferMap Transfers; // DBG_VALUEs associated with spills. DenseMap<unsigned int, MachineBasicBlock *> OrderToBB; DenseMap<MachineBasicBlock *, unsigned int> BBToOrder; @@ -624,6 +721,8 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { std::greater<unsigned int>> Pending; + enum : bool { dontTransferChanges = false, transferChanges = true }; + // Initialize every mbb with OutLocs. // We are not looking at any spill instructions during the initial pass // over the BBs. The LiveDebugVariables pass has already created DBG_VALUE @@ -631,11 +730,11 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { // within the BB in which the spill occurs. for (auto &MBB : MF) for (auto &MI : MBB) - transfer(MI, OpenRanges, OutLocs, VarLocIDs, Spills, - /*transferSpills=*/false); + process(MI, OpenRanges, OutLocs, VarLocIDs, Transfers, + dontTransferChanges); - DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, "OutLocs after initialization", - dbgs())); + LLVM_DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, + "OutLocs after initialization", dbgs())); ReversePostOrderTraversal<MachineFunction *> RPOT(&MF); unsigned int RPONumber = 0; @@ -646,7 +745,7 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { ++RPONumber; } // This is a standard "union of predecessor outs" dataflow problem. - // To solve it, we perform join() and transfer() using the two worklist method + // To solve it, we perform join() and process() using the two worklist method // until the ranges converge. // Ranges have converged when both worklists are empty. SmallPtrSet<const MachineBasicBlock *, 16> Visited; @@ -655,7 +754,7 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { // thing twice. We could avoid this with a custom priority queue, but this // is probably not worth it. SmallPtrSet<MachineBasicBlock *, 16> OnPending; - DEBUG(dbgs() << "Processing Worklist\n"); + LLVM_DEBUG(dbgs() << "Processing Worklist\n"); while (!Worklist.empty()) { MachineBasicBlock *MBB = OrderToBB[Worklist.top()]; Worklist.pop(); @@ -668,19 +767,19 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { // examine spill instructions to see whether they spill registers that // correspond to user variables. for (auto &MI : *MBB) - OLChanged |= transfer(MI, OpenRanges, OutLocs, VarLocIDs, Spills, - /*transferSpills=*/true); + OLChanged |= process(MI, OpenRanges, OutLocs, VarLocIDs, Transfers, + transferChanges); // Add any DBG_VALUE instructions necessitated by spills. - for (auto &SP : Spills) - MBB->insertAfter(MachineBasicBlock::iterator(*SP.SpillInst), - SP.DebugInst); - Spills.clear(); + for (auto &TR : Transfers) + MBB->insertAfter(MachineBasicBlock::iterator(*TR.TransferInst), + TR.DebugInst); + Transfers.clear(); - DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, - "OutLocs after propagating", dbgs())); - DEBUG(printVarLocInMBB(MF, InLocs, VarLocIDs, - "InLocs after propagating", dbgs())); + LLVM_DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, + "OutLocs after propagating", dbgs())); + LLVM_DEBUG(printVarLocInMBB(MF, InLocs, VarLocIDs, + "InLocs after propagating", dbgs())); if (OLChanged) { OLChanged = false; @@ -697,8 +796,8 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { assert(Pending.empty() && "Pending should be empty"); } - DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, "Final OutLocs", dbgs())); - DEBUG(printVarLocInMBB(MF, InLocs, VarLocIDs, "Final InLocs", dbgs())); + LLVM_DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, "Final OutLocs", dbgs())); + LLVM_DEBUG(printVarLocInMBB(MF, InLocs, VarLocIDs, "Final InLocs", dbgs())); return Changed; } @@ -715,6 +814,8 @@ bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) { TRI = MF.getSubtarget().getRegisterInfo(); TII = MF.getSubtarget().getInstrInfo(); TFI = MF.getSubtarget().getFrameLowering(); + TFI->determineCalleeSaves(MF, CalleeSavedRegs, + make_unique<RegScavenger>().get()); LS.initialize(MF); bool Changed = ExtendRanges(MF); diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp index 4ffcffcea693..3ff03ec4a7ee 100644 --- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -44,6 +44,7 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" @@ -223,7 +224,12 @@ public: return L1; } - /// getLocationNo - Return the location number that matches Loc. + /// Return the location number that matches Loc. + /// + /// For undef values we always return location number UndefLocNo without + /// inserting anything in locations. Since locations is a vector and the + /// location number is the position in the vector and UndefLocNo is ~0, + /// we would need a very big vector to put the value at the right position. unsigned getLocationNo(const MachineOperand &LocMO) { if (LocMO.isReg()) { if (LocMO.getReg() == 0) @@ -301,7 +307,7 @@ public: /// splitRegister - Replace OldReg ranges with NewRegs ranges where NewRegs is /// live. Returns true if any changes were made. - bool splitRegister(unsigned OldLocNo, ArrayRef<unsigned> NewRegs, + bool splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs, LiveIntervals &LIS); /// rewriteLocations - Rewrite virtual register locations according to the @@ -510,7 +516,7 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) { if (MI.getNumOperands() != 4 || !(MI.getOperand(1).isReg() || MI.getOperand(1).isImm()) || !MI.getOperand(2).isMetadata()) { - DEBUG(dbgs() << "Can't handle " << MI); + LLVM_DEBUG(dbgs() << "Can't handle " << MI); return false; } @@ -529,8 +535,8 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) { // The DBG_VALUE is described by a virtual register that does not have a // live interval. Discard the DBG_VALUE. Discard = true; - DEBUG(dbgs() << "Discarding debug info (no LIS interval): " - << Idx << " " << MI); + LLVM_DEBUG(dbgs() << "Discarding debug info (no LIS interval): " << Idx + << " " << MI); } else { // The DBG_VALUE is only valid if either Reg is live out from Idx, or Reg // is defined dead at Idx (where Idx is the slot index for the instruction @@ -541,8 +547,8 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) { // We have found a DBG_VALUE with the value in a virtual register that // is not live. Discard the DBG_VALUE. Discard = true; - DEBUG(dbgs() << "Discarding debug info (reg not live): " - << Idx << " " << MI); + LLVM_DEBUG(dbgs() << "Discarding debug info (reg not live): " << Idx + << " " << MI); } } } @@ -687,7 +693,8 @@ void UserValue::addDefsFromCopies( if (CopyValues.empty()) return; - DEBUG(dbgs() << "Got " << CopyValues.size() << " copies of " << *LI << '\n'); + LLVM_DEBUG(dbgs() << "Got " << CopyValues.size() << " copies of " << *LI + << '\n'); // Try to add defs of the copied values for each kill point. for (unsigned i = 0, e = Kills.size(); i != e; ++i) { @@ -701,8 +708,8 @@ void UserValue::addDefsFromCopies( LocMap::iterator I = locInts.find(Idx); if (I.valid() && I.start() <= Idx) continue; - DEBUG(dbgs() << "Kill at " << Idx << " covered by valno #" - << DstVNI->id << " in " << *DstLI << '\n'); + LLVM_DEBUG(dbgs() << "Kill at " << Idx << " covered by valno #" + << DstVNI->id << " in " << *DstLI << '\n'); MachineInstr *CopyMI = LIS.getInstructionFromIndex(DstVNI->def); assert(CopyMI && CopyMI->isCopy() && "Bad copy value"); unsigned LocNo = getLocationNo(CopyMI->getOperand(0)); @@ -759,13 +766,6 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI, // function). } - // Erase all the undefs. - for (LocMap::iterator I = locInts.begin(); I.valid();) - if (I.value().isUndef()) - I.erase(); - else - ++I; - // The computed intervals may extend beyond the range of the debug // location's lexical scope. In this case, splitting of an interval // can result in an interval outside of the scope being created, @@ -850,12 +850,12 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) { MF = &mf; LIS = &pass.getAnalysis<LiveIntervals>(); TRI = mf.getSubtarget().getRegisterInfo(); - DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: " - << mf.getName() << " **********\n"); + LLVM_DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: " + << mf.getName() << " **********\n"); bool Changed = collectDebugValues(mf); computeIntervals(); - DEBUG(print(dbgs())); + LLVM_DEBUG(print(dbgs())); ModifiedMF = Changed; return Changed; } @@ -901,7 +901,7 @@ LiveDebugVariables::~LiveDebugVariables() { bool UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs, LiveIntervals& LIS) { - DEBUG({ + LLVM_DEBUG({ dbgs() << "Splitting Loc" << OldLocNo << '\t'; print(dbgs(), nullptr); }); @@ -984,17 +984,22 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs, while (LocMapI.valid()) { DbgValueLocation v = LocMapI.value(); if (v.locNo() == OldLocNo) { - DEBUG(dbgs() << "Erasing [" << LocMapI.start() << ';' - << LocMapI.stop() << ")\n"); + LLVM_DEBUG(dbgs() << "Erasing [" << LocMapI.start() << ';' + << LocMapI.stop() << ")\n"); LocMapI.erase(); } else { - if (v.locNo() > OldLocNo) + // Undef values always have location number UndefLocNo, so don't change + // locNo in that case. See getLocationNo(). + if (!v.isUndef() && v.locNo() > OldLocNo) LocMapI.setValueUnchecked(v.changeLocNo(v.locNo() - 1)); ++LocMapI; } } - DEBUG({dbgs() << "Split result: \t"; print(dbgs(), nullptr);}); + LLVM_DEBUG({ + dbgs() << "Split result: \t"; + print(dbgs(), nullptr); + }); return DidChange; } @@ -1094,6 +1099,10 @@ void UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI, // physical register. for (LocMap::iterator I = locInts.begin(); I.valid(); ++I) { DbgValueLocation Loc = I.value(); + // Undef values don't exist in locations (and thus not in LocNoMap either) + // so skip over them. See getLocationNo(). + if (Loc.isUndef()) + continue; unsigned NewLocNo = LocNoMap[Loc.locNo()]; I.setValueUnchecked(Loc.changeLocNo(NewLocNo)); I.setStart(I.start()); @@ -1136,7 +1145,7 @@ findNextInsertLocation(MachineBasicBlock *MBB, unsigned Reg = LocMO.getReg(); // Find the next instruction in the MBB that define the register Reg. - while (I != MBB->end()) { + while (I != MBB->end() && !I->isTerminator()) { if (!LIS.isNotInMIMap(*I) && SlotIndex::isEarlierEqualInstr(StopIdx, LIS.getInstructionIndex(*I))) break; @@ -1158,7 +1167,15 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx, // Only search within the current MBB. StopIdx = (MBBEndIdx < StopIdx) ? MBBEndIdx : StopIdx; MachineBasicBlock::iterator I = findInsertLocation(MBB, StartIdx, LIS); - MachineOperand &MO = locations[Loc.locNo()]; + // Undef values don't exist in locations so create new "noreg" register MOs + // for them. See getLocationNo(). + MachineOperand MO = !Loc.isUndef() ? + locations[Loc.locNo()] : + MachineOperand::CreateReg(/* Reg */ 0, /* isDef */ false, /* isImp */ false, + /* isKill */ false, /* isDead */ false, + /* isUndef */ false, /* isEarlyClobber */ false, + /* SubReg */ 0, /* isDebug */ true); + ++NumInsertedDebugValues; assert(cast<DILocalVariable>(Variable) @@ -1179,14 +1196,8 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx, assert((!Spilled || MO.isFI()) && "a spilled location must be a frame index"); do { - MachineInstrBuilder MIB = - BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE)) - .add(MO); - if (IsIndirect) - MIB.addImm(0U); - else - MIB.addReg(0U, RegState::Debug); - MIB.addMetadata(Variable).addMetadata(Expr); + BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE), + IsIndirect, MO, Variable, Expr); // Continue and insert DBG_VALUES after every redefinition of register // associated with the debug value within the range @@ -1212,11 +1223,11 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, if (trimmedDefs.count(Start)) Start = Start.getPrevIndex(); - DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << Loc.locNo()); + LLVM_DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << Loc.locNo()); MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start)->getIterator(); SlotIndex MBBEnd = LIS.getMBBEndIdx(&*MBB); - DEBUG(dbgs() << ' ' << printMBBReference(*MBB) << '-' << MBBEnd); + LLVM_DEBUG(dbgs() << ' ' << printMBBReference(*MBB) << '-' << MBBEnd); insertDebugValue(&*MBB, Start, Stop, Loc, Spilled, LIS, TII, TRI); // This interval may span multiple basic blocks. // Insert a DBG_VALUE into each one. @@ -1226,10 +1237,10 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, if (++MBB == MFEnd) break; MBBEnd = LIS.getMBBEndIdx(&*MBB); - DEBUG(dbgs() << ' ' << printMBBReference(*MBB) << '-' << MBBEnd); + LLVM_DEBUG(dbgs() << ' ' << printMBBReference(*MBB) << '-' << MBBEnd); insertDebugValue(&*MBB, Start, Stop, Loc, Spilled, LIS, TII, TRI); } - DEBUG(dbgs() << '\n'); + LLVM_DEBUG(dbgs() << '\n'); if (MBB == MFEnd) break; @@ -1238,13 +1249,13 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, } void LDVImpl::emitDebugValues(VirtRegMap *VRM) { - DEBUG(dbgs() << "********** EMITTING LIVE DEBUG VARIABLES **********\n"); + LLVM_DEBUG(dbgs() << "********** EMITTING LIVE DEBUG VARIABLES **********\n"); if (!MF) return; const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); BitVector SpilledLocations; for (unsigned i = 0, e = userValues.size(); i != e; ++i) { - DEBUG(userValues[i]->print(dbgs(), TRI)); + LLVM_DEBUG(userValues[i]->print(dbgs(), TRI)); userValues[i]->rewriteLocations(*VRM, *TRI, SpilledLocations); userValues[i]->emitDebugValues(VRM, *LIS, *TII, *TRI, SpilledLocations); } diff --git a/contrib/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm/lib/CodeGen/LiveInterval.cpp index 302c75133e35..83dd982587c6 100644 --- a/contrib/llvm/lib/CodeGen/LiveInterval.cpp +++ b/contrib/llvm/lib/CodeGen/LiveInterval.cpp @@ -33,6 +33,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/Config/llvm-config.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" @@ -991,6 +992,7 @@ void LiveInterval::print(raw_ostream &OS) const { // Print subranges for (const SubRange &SR : subranges()) OS << SR; + OS << " weight:" << weight; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp index 3e742a6c2f21..36428e0335f9 100644 --- a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp +++ b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp @@ -187,7 +187,7 @@ void LiveIntervalUnion::Array::init(LiveIntervalUnion::Allocator &Alloc, clear(); Size = NSize; LIUs = static_cast<LiveIntervalUnion*>( - malloc(sizeof(LiveIntervalUnion)*NSize)); + safe_malloc(sizeof(LiveIntervalUnion)*NSize)); for (unsigned i = 0; i != Size; ++i) new(LIUs + i) LiveIntervalUnion(Alloc); } diff --git a/contrib/llvm/lib/CodeGen/LiveIntervals.cpp b/contrib/llvm/lib/CodeGen/LiveIntervals.cpp index 79fdba7e062a..471775f8706b 100644 --- a/contrib/llvm/lib/CodeGen/LiveIntervals.cpp +++ b/contrib/llvm/lib/CodeGen/LiveIntervals.cpp @@ -37,6 +37,7 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/Config/llvm-config.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" @@ -147,7 +148,7 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { for (unsigned i = 0, e = TRI->getNumRegUnits(); i != e; ++i) getRegUnit(i); } - DEBUG(dump()); + LLVM_DEBUG(dump()); return true; } @@ -310,7 +311,7 @@ void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) { /// entering the entry block or a landing pad. void LiveIntervals::computeLiveInRegUnits() { RegUnitRanges.resize(TRI->getNumRegUnits()); - DEBUG(dbgs() << "Computing live-in reg-units in ABI blocks.\n"); + LLVM_DEBUG(dbgs() << "Computing live-in reg-units in ABI blocks.\n"); // Keep track of the live range sets allocated. SmallVector<unsigned, 8> NewRanges; @@ -323,7 +324,7 @@ void LiveIntervals::computeLiveInRegUnits() { // Create phi-defs at Begin for all live-in registers. SlotIndex Begin = Indexes->getMBBStartIdx(&MBB); - DEBUG(dbgs() << Begin << "\t" << printMBBReference(MBB)); + LLVM_DEBUG(dbgs() << Begin << "\t" << printMBBReference(MBB)); for (const auto &LI : MBB.liveins()) { for (MCRegUnitIterator Units(LI.PhysReg, TRI); Units.isValid(); ++Units) { unsigned Unit = *Units; @@ -335,12 +336,12 @@ void LiveIntervals::computeLiveInRegUnits() { } VNInfo *VNI = LR->createDeadDef(Begin, getVNInfoAllocator()); (void)VNI; - DEBUG(dbgs() << ' ' << printRegUnit(Unit, TRI) << '#' << VNI->id); + LLVM_DEBUG(dbgs() << ' ' << printRegUnit(Unit, TRI) << '#' << VNI->id); } } - DEBUG(dbgs() << '\n'); + LLVM_DEBUG(dbgs() << '\n'); } - DEBUG(dbgs() << "Created " << NewRanges.size() << " new intervals.\n"); + LLVM_DEBUG(dbgs() << "Created " << NewRanges.size() << " new intervals.\n"); // Compute the 'normal' part of the ranges. for (unsigned Unit : NewRanges) @@ -357,26 +358,40 @@ static void createSegmentsForValues(LiveRange &LR, } } -using ShrinkToUsesWorkList = SmallVector<std::pair<SlotIndex, VNInfo*>, 16>; - -static void extendSegmentsToUses(LiveRange &LR, const SlotIndexes &Indexes, - ShrinkToUsesWorkList &WorkList, - const LiveRange &OldRange) { +void LiveIntervals::extendSegmentsToUses(LiveRange &Segments, + ShrinkToUsesWorkList &WorkList, + unsigned Reg, LaneBitmask LaneMask) { // Keep track of the PHIs that are in use. SmallPtrSet<VNInfo*, 8> UsedPHIs; // Blocks that have already been added to WorkList as live-out. SmallPtrSet<const MachineBasicBlock*, 16> LiveOut; + auto getSubRange = [](const LiveInterval &I, LaneBitmask M) + -> const LiveRange& { + if (M.none()) + return I; + for (const LiveInterval::SubRange &SR : I.subranges()) { + if ((SR.LaneMask & M).any()) { + assert(SR.LaneMask == M && "Expecting lane masks to match exactly"); + return SR; + } + } + llvm_unreachable("Subrange for mask not found"); + }; + + const LiveInterval &LI = getInterval(Reg); + const LiveRange &OldRange = getSubRange(LI, LaneMask); + // Extend intervals to reach all uses in WorkList. while (!WorkList.empty()) { SlotIndex Idx = WorkList.back().first; VNInfo *VNI = WorkList.back().second; WorkList.pop_back(); - const MachineBasicBlock *MBB = Indexes.getMBBFromIndex(Idx.getPrevSlot()); - SlotIndex BlockStart = Indexes.getMBBStartIdx(MBB); + const MachineBasicBlock *MBB = Indexes->getMBBFromIndex(Idx.getPrevSlot()); + SlotIndex BlockStart = Indexes->getMBBStartIdx(MBB); // Extend the live range for VNI to be live at Idx. - if (VNInfo *ExtVNI = LR.extendInBlock(BlockStart, Idx)) { + if (VNInfo *ExtVNI = Segments.extendInBlock(BlockStart, Idx)) { assert(ExtVNI == VNI && "Unexpected existing value number"); (void)ExtVNI; // Is this a PHIDef we haven't seen before? @@ -387,7 +402,7 @@ static void extendSegmentsToUses(LiveRange &LR, const SlotIndexes &Indexes, for (const MachineBasicBlock *Pred : MBB->predecessors()) { if (!LiveOut.insert(Pred).second) continue; - SlotIndex Stop = Indexes.getMBBEndIdx(Pred); + SlotIndex Stop = Indexes->getMBBEndIdx(Pred); // A predecessor is not required to have a live-out value for a PHI. if (VNInfo *PVNI = OldRange.getVNInfoBefore(Stop)) WorkList.push_back(std::make_pair(Stop, PVNI)); @@ -396,24 +411,37 @@ static void extendSegmentsToUses(LiveRange &LR, const SlotIndexes &Indexes, } // VNI is live-in to MBB. - DEBUG(dbgs() << " live-in at " << BlockStart << '\n'); - LR.addSegment(LiveRange::Segment(BlockStart, Idx, VNI)); + LLVM_DEBUG(dbgs() << " live-in at " << BlockStart << '\n'); + Segments.addSegment(LiveRange::Segment(BlockStart, Idx, VNI)); // Make sure VNI is live-out from the predecessors. for (const MachineBasicBlock *Pred : MBB->predecessors()) { if (!LiveOut.insert(Pred).second) continue; - SlotIndex Stop = Indexes.getMBBEndIdx(Pred); - assert(OldRange.getVNInfoBefore(Stop) == VNI && - "Wrong value out of predecessor"); - WorkList.push_back(std::make_pair(Stop, VNI)); + SlotIndex Stop = Indexes->getMBBEndIdx(Pred); + if (VNInfo *OldVNI = OldRange.getVNInfoBefore(Stop)) { + assert(OldVNI == VNI && "Wrong value out of predecessor"); + (void)OldVNI; + WorkList.push_back(std::make_pair(Stop, VNI)); + } else { +#ifndef NDEBUG + // There was no old VNI. Verify that Stop is jointly dominated + // by <undef>s for this live range. + assert(LaneMask.any() && + "Missing value out of predecessor for main range"); + SmallVector<SlotIndex,8> Undefs; + LI.computeSubRangeUndefs(Undefs, LaneMask, *MRI, *Indexes); + assert(LiveRangeCalc::isJointlyDominated(Pred, Undefs, *Indexes) && + "Missing value out of predecessor for subrange"); +#endif + } } } } bool LiveIntervals::shrinkToUses(LiveInterval *li, SmallVectorImpl<MachineInstr*> *dead) { - DEBUG(dbgs() << "Shrink: " << *li << '\n'); + LLVM_DEBUG(dbgs() << "Shrink: " << *li << '\n'); assert(TargetRegisterInfo::isVirtualRegister(li->reg) && "Can only shrink virtual registers"); @@ -442,9 +470,10 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, // This shouldn't happen: readsVirtualRegister returns true, but there is // no live value. It is likely caused by a target getting <undef> flags // wrong. - DEBUG(dbgs() << Idx << '\t' << UseMI - << "Warning: Instr claims to read non-existent value in " - << *li << '\n'); + LLVM_DEBUG( + dbgs() << Idx << '\t' << UseMI + << "Warning: Instr claims to read non-existent value in " + << *li << '\n'); continue; } // Special case: An early-clobber tied operand reads and writes the @@ -458,14 +487,14 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, // Create new live ranges with only minimal live segments per def. LiveRange NewLR; createSegmentsForValues(NewLR, make_range(li->vni_begin(), li->vni_end())); - extendSegmentsToUses(NewLR, *Indexes, WorkList, *li); + extendSegmentsToUses(NewLR, WorkList, Reg, LaneBitmask::getNone()); // Move the trimmed segments back. li->segments.swap(NewLR.segments); // Handle dead values. bool CanSeparate = computeDeadValues(*li, dead); - DEBUG(dbgs() << "Shrunk: " << *li << '\n'); + LLVM_DEBUG(dbgs() << "Shrunk: " << *li << '\n'); return CanSeparate; } @@ -495,7 +524,7 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI, // This is a dead PHI. Remove it. VNI->markUnused(); LI.removeSegment(I); - DEBUG(dbgs() << "Dead PHI at " << Def << " may separate interval\n"); + LLVM_DEBUG(dbgs() << "Dead PHI at " << Def << " may separate interval\n"); MayHaveSplitComponents = true; } else { // This is a dead def. Make sure the instruction knows. @@ -503,7 +532,7 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI, assert(MI && "No instruction defining live value"); MI->addRegisterDead(LI.reg, TRI); if (dead && MI->allDefsAreDead()) { - DEBUG(dbgs() << "All defs dead: " << Def << '\t' << *MI); + LLVM_DEBUG(dbgs() << "All defs dead: " << Def << '\t' << *MI); dead->push_back(MI); } } @@ -512,7 +541,7 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI, } void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg) { - DEBUG(dbgs() << "Shrink: " << SR << '\n'); + LLVM_DEBUG(dbgs() << "Shrink: " << SR << '\n'); assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Can only shrink virtual registers"); // Find all the values used, including PHI kills. @@ -556,7 +585,7 @@ void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg) { // Create a new live ranges with only minimal live segments per def. LiveRange NewLR; createSegmentsForValues(NewLR, make_range(SR.vni_begin(), SR.vni_end())); - extendSegmentsToUses(NewLR, *Indexes, WorkList, SR); + extendSegmentsToUses(NewLR, WorkList, Reg, SR.LaneMask); // Move the trimmed ranges back. SR.segments.swap(NewLR.segments); @@ -571,13 +600,14 @@ void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg) { continue; if (VNI->isPHIDef()) { // This is a dead PHI. Remove it. - DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n"); + LLVM_DEBUG(dbgs() << "Dead PHI at " << VNI->def + << " may separate interval\n"); VNI->markUnused(); SR.removeSegment(*Segment); } } - DEBUG(dbgs() << "Shrunk: " << SR << '\n'); + LLVM_DEBUG(dbgs() << "Shrunk: " << SR << '\n'); } void LiveIntervals::extendToIndices(LiveRange &LR, @@ -785,7 +815,7 @@ MachineBasicBlock* LiveIntervals::intervalIsInOneMBB(const LiveInterval &LI) const { // A local live range must be fully contained inside the block, meaning it is // defined and killed at instructions, not at block boundaries. It is not - // live in or or out of any block. + // live in or out of any block. // // It is technically possible to have a PHI-defined live range identical to a // single block, but we are going to return false in that case. @@ -942,7 +972,8 @@ public: /// Update all live ranges touched by MI, assuming a move from OldIdx to /// NewIdx. void updateAllRanges(MachineInstr *MI) { - DEBUG(dbgs() << "handleMove " << OldIdx << " -> " << NewIdx << ": " << *MI); + LLVM_DEBUG(dbgs() << "handleMove " << OldIdx << " -> " << NewIdx << ": " + << *MI); bool hasRegMask = false; for (MachineOperand &MO : MI->operands()) { if (MO.isRegMask()) @@ -992,7 +1023,7 @@ private: void updateRange(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask) { if (!Updated.insert(&LR).second) return; - DEBUG({ + LLVM_DEBUG({ dbgs() << " "; if (TargetRegisterInfo::isVirtualRegister(Reg)) { dbgs() << printReg(Reg); @@ -1007,7 +1038,7 @@ private: handleMoveDown(LR); else handleMoveUp(LR, Reg, LaneMask); - DEBUG(dbgs() << " -->\t" << LR << '\n'); + LLVM_DEBUG(dbgs() << " -->\t" << LR << '\n'); LR.verify(); } @@ -1291,6 +1322,36 @@ private: if (OldIdxIn != E && SlotIndex::isEarlierInstr(NewIdx, OldIdxIn->end)) OldIdxIn->end = NewIdx.getRegSlot(); } + } else if (OldIdxIn != E + && SlotIndex::isEarlierInstr(NewIdxOut->start, NewIdx) + && SlotIndex::isEarlierInstr(NewIdx, NewIdxOut->end)) { + // OldIdxVNI is a dead def that has been moved into the middle of + // another value in LR. That can happen when LR is a whole register, + // but the dead def is a write to a subreg that is dead at NewIdx. + // The dead def may have been moved across other values + // in LR, so move OldIdxOut up to NewIdxOut. Slide [NewIdxOut;OldIdxOut) + // down one position. + // |- X0/NewIdxOut -| ... |- Xn-1 -| |- Xn/OldIdxOut -| |- next - | + // => |- X0/NewIdxOut -| |- X0 -| ... |- Xn-1 -| |- next -| + std::copy_backward(NewIdxOut, OldIdxOut, std::next(OldIdxOut)); + // Modify the segment at NewIdxOut and the following segment to meet at + // the point of the dead def, with the following segment getting + // OldIdxVNI as its value number. + *NewIdxOut = LiveRange::Segment( + NewIdxOut->start, NewIdxDef.getRegSlot(), NewIdxOut->valno); + *(NewIdxOut + 1) = LiveRange::Segment( + NewIdxDef.getRegSlot(), (NewIdxOut + 1)->end, OldIdxVNI); + OldIdxVNI->def = NewIdxDef; + // Modify subsequent segments to be defined by the moved def OldIdxVNI. + for (auto Idx = NewIdxOut + 2; Idx <= OldIdxOut; ++Idx) + Idx->valno = OldIdxVNI; + // Aggressively remove all dead flags from the former dead definition. + // Kill/dead flags shouldn't be used while live intervals exist; they + // will be reinserted by VirtRegRewriter. + if (MachineInstr *KillMI = LIS.getInstructionFromIndex(NewIdx)) + for (MIBundleOperands MO(*KillMI); MO.isValid(); ++MO) + if (MO->isReg() && !MO->isUse()) + MO->setIsDead(false); } else { // OldIdxVNI is a dead def. It may have been moved across other values // in LR, so move OldIdxOut up to NewIdxOut. Slide [NewIdxOut;OldIdxOut) @@ -1360,7 +1421,7 @@ private: MachineBasicBlock::iterator Begin = MBB->begin(); while (MII != Begin) { - if ((--MII)->isDebugValue()) + if ((--MII)->isDebugInstr()) continue; SlotIndex Idx = Indexes->getInstructionIndex(*MII); @@ -1422,7 +1483,7 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin, for (MachineBasicBlock::iterator I = End; I != Begin;) { --I; MachineInstr &MI = *I; - if (MI.isDebugValue()) + if (MI.isDebugInstr()) continue; SlotIndex instrIdx = getInstructionIndex(MI); @@ -1519,7 +1580,7 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, for (MachineBasicBlock::iterator I = End; I != Begin;) { --I; MachineInstr &MI = *I; - if (MI.isDebugValue()) + if (MI.isDebugInstr()) continue; for (MachineInstr::const_mop_iterator MOI = MI.operands_begin(), MOE = MI.operands_end(); @@ -1580,7 +1641,7 @@ void LiveIntervals::splitSeparateComponents(LiveInterval &LI, unsigned NumComp = ConEQ.Classify(LI); if (NumComp <= 1) return; - DEBUG(dbgs() << " Split " << NumComp << " components: " << LI << '\n'); + LLVM_DEBUG(dbgs() << " Split " << NumComp << " components: " << LI << '\n'); unsigned Reg = LI.reg; const TargetRegisterClass *RegClass = MRI->getRegClass(Reg); for (unsigned I = 1; I < NumComp; ++I) { diff --git a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp index 277212cf7dac..86c6c8e29f9a 100644 --- a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp +++ b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp @@ -18,12 +18,13 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Config/llvm-config.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -/// \brief Remove all registers from the set that get clobbered by the register +/// Remove all registers from the set that get clobbered by the register /// mask. /// The clobbers set will be the list of live registers clobbered /// by the regmask. @@ -44,7 +45,7 @@ void LivePhysRegs::removeRegsInMask(const MachineOperand &MO, void LivePhysRegs::removeDefs(const MachineInstr &MI) { for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { if (O->isReg()) { - if (!O->isDef()) + if (!O->isDef() || O->isDebug()) continue; unsigned Reg = O->getReg(); if (!TargetRegisterInfo::isPhysicalRegister(Reg)) @@ -58,7 +59,7 @@ void LivePhysRegs::removeDefs(const MachineInstr &MI) { /// Add uses to the set. void LivePhysRegs::addUses(const MachineInstr &MI) { for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { - if (!O->isReg() || !O->readsReg()) + if (!O->isReg() || !O->readsReg() || O->isDebug()) continue; unsigned Reg = O->getReg(); if (!TargetRegisterInfo::isPhysicalRegister(Reg)) @@ -85,7 +86,7 @@ void LivePhysRegs::stepForward(const MachineInstr &MI, SmallVectorImpl<std::pair<unsigned, const MachineOperand*>> &Clobbers) { // Remove killed registers from the set. for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { - if (O->isReg()) { + if (O->isReg() && !O->isDebug()) { unsigned Reg = O->getReg(); if (!TargetRegisterInfo::isPhysicalRegister(Reg)) continue; @@ -105,9 +106,13 @@ void LivePhysRegs::stepForward(const MachineInstr &MI, // Add defs to the set. for (auto Reg : Clobbers) { - // Skip dead defs. They shouldn't be added to the set. + // Skip dead defs and registers clobbered by regmasks. They shouldn't + // be added to the set. if (Reg.second->isReg() && Reg.second->isDead()) continue; + if (Reg.second->isRegMask() && + MachineOperand::clobbersPhysReg(Reg.second->getRegMask(), Reg.first)) + continue; addReg(Reg.first); } } diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp index 66c23b7b69ce..04324943dfad 100644 --- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp +++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp @@ -584,3 +584,24 @@ void LiveRangeCalc::updateSSA() { } } while (Changed); } + +bool LiveRangeCalc::isJointlyDominated(const MachineBasicBlock *MBB, + ArrayRef<SlotIndex> Defs, + const SlotIndexes &Indexes) { + const MachineFunction &MF = *MBB->getParent(); + BitVector DefBlocks(MF.getNumBlockIDs()); + for (SlotIndex I : Defs) + DefBlocks.set(Indexes.getMBBFromIndex(I)->getNumber()); + + SetVector<unsigned> PredQueue; + PredQueue.insert(MBB->getNumber()); + for (unsigned i = 0; i != PredQueue.size(); ++i) { + unsigned BN = PredQueue[i]; + if (DefBlocks[BN]) + return true; + const MachineBasicBlock *B = MF.getBlockNumbered(BN); + for (const MachineBasicBlock *P : B->predecessors()) + PredQueue.insert(P->getNumber()); + } + return false; +} diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h index c4914f23f56d..9f226b154a67 100644 --- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h +++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h @@ -147,7 +147,7 @@ class LiveRangeCalc { /// /// PhysReg, when set, is used to verify live-in lists on basic blocks. bool findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, - SlotIndex Kill, unsigned PhysReg, + SlotIndex Use, unsigned PhysReg, ArrayRef<SlotIndex> Undefs); /// updateSSA - Compute the values that will be live in to all requested @@ -282,6 +282,15 @@ public: /// Every predecessor of a live-in block must have been given a value with /// setLiveOutValue, the value may be null for live-trough blocks. void calculateValues(); + + /// A diagnostic function to check if the end of the block @p MBB is + /// jointly dominated by the blocks corresponding to the slot indices + /// in @p Defs. This function is mainly for use in self-verification + /// checks. + LLVM_ATTRIBUTE_UNUSED + static bool isJointlyDominated(const MachineBasicBlock *MBB, + ArrayRef<SlotIndex> Defs, + const SlotIndexes &Indexes); }; } // end namespace llvm diff --git a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp index 86cfbd87f5b1..8dfe8b68c3af 100644 --- a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -31,21 +31,24 @@ STATISTIC(NumFracRanges, "Number of live ranges fractured by DCE"); void LiveRangeEdit::Delegate::anchor() { } -LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(unsigned OldReg) { +LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(unsigned OldReg, + bool createSubRanges) { unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); - if (VRM) { + if (VRM) VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg)); - } + LiveInterval &LI = LIS.createEmptyInterval(VReg); if (Parent && !Parent->isSpillable()) LI.markNotSpillable(); - // Create empty subranges if the OldReg's interval has them. Do not create - // the main range here---it will be constructed later after the subranges - // have been finalized. - LiveInterval &OldLI = LIS.getInterval(OldReg); - VNInfo::Allocator &Alloc = LIS.getVNInfoAllocator(); - for (LiveInterval::SubRange &S : OldLI.subranges()) - LI.createSubRange(Alloc, S.LaneMask); + if (createSubRanges) { + // Create empty subranges if the OldReg's interval has them. Do not create + // the main range here---it will be constructed later after the subranges + // have been finalized. + LiveInterval &OldLI = LIS.getInterval(OldReg); + VNInfo::Allocator &Alloc = LIS.getVNInfoAllocator(); + for (LiveInterval::SubRange &S : OldLI.subranges()) + LI.createSubRange(Alloc, S.LaneMask); + } return LI; } @@ -217,8 +220,8 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, if (!DefMI->isSafeToMove(nullptr, SawStore)) return false; - DEBUG(dbgs() << "Try to fold single def: " << *DefMI - << " into single use: " << *UseMI); + LLVM_DEBUG(dbgs() << "Try to fold single def: " << *DefMI + << " into single use: " << *UseMI); SmallVector<unsigned, 8> Ops; if (UseMI->readsWritesVirtualRegister(LI->reg, &Ops).second) @@ -227,7 +230,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, MachineInstr *FoldMI = TII.foldMemoryOperand(*UseMI, Ops, *DefMI, &LIS); if (!FoldMI) return false; - DEBUG(dbgs() << " folded: " << *FoldMI); + LLVM_DEBUG(dbgs() << " folded: " << *FoldMI); LIS.ReplaceMachineInstrInMaps(*UseMI, *FoldMI); UseMI->eraseFromParent(); DefMI->addRegisterDead(LI->reg, nullptr); @@ -264,18 +267,18 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink, } // Never delete inline asm. if (MI->isInlineAsm()) { - DEBUG(dbgs() << "Won't delete: " << Idx << '\t' << *MI); + LLVM_DEBUG(dbgs() << "Won't delete: " << Idx << '\t' << *MI); return; } // Use the same criteria as DeadMachineInstructionElim. bool SawStore = false; if (!MI->isSafeToMove(nullptr, SawStore)) { - DEBUG(dbgs() << "Can't delete: " << Idx << '\t' << *MI); + LLVM_DEBUG(dbgs() << "Can't delete: " << Idx << '\t' << *MI); return; } - DEBUG(dbgs() << "Deleting dead def " << Idx << '\t' << *MI); + LLVM_DEBUG(dbgs() << "Deleting dead def " << Idx << '\t' << *MI); // Collect virtual registers to be erased after MI is gone. SmallVector<unsigned, 8> RegsToErase; @@ -349,7 +352,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink, continue; MI->RemoveOperand(i-1); } - DEBUG(dbgs() << "Converted physregs to:\t" << *MI); + LLVM_DEBUG(dbgs() << "Converted physregs to:\t" << *MI); } else { // If the dest of MI is an original reg and MI is reMaterializable, // don't delete the inst. Replace the dest with a new reg, and keep @@ -357,12 +360,11 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink, // LiveRangeEdit::DeadRemats and will be deleted after all the // allocations of the func are done. if (isOrigDef && DeadRemats && TII.isTriviallyReMaterializable(*MI, AA)) { - LiveInterval &NewLI = createEmptyIntervalFrom(Dest); - NewLI.removeEmptySubRanges(); + LiveInterval &NewLI = createEmptyIntervalFrom(Dest, false); VNInfo *VNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator()); NewLI.addSegment(LiveInterval::Segment(Idx, Idx.getDeadSlot(), VNI)); pop_back(); - markDeadRemat(MI); + DeadRemats->insert(MI); const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); MI->substituteRegister(Dest, NewLI.reg, 0, TRI); MI->getOperand(0).setIsDead(true); @@ -463,7 +465,7 @@ LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF, for (unsigned I = 0, Size = size(); I < Size; ++I) { LiveInterval &LI = LIS.getInterval(get(I)); if (MRI.recomputeRegClass(LI.reg)) - DEBUG({ + LLVM_DEBUG({ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); dbgs() << "Inflated " << printReg(LI.reg) << " to " << TRI->getRegClassName(MRI.getRegClass(LI.reg)) << '\n'; diff --git a/contrib/llvm/lib/CodeGen/LiveRangeShrink.cpp b/contrib/llvm/lib/CodeGen/LiveRangeShrink.cpp index 02e1f3b01ade..f75d513c89f5 100644 --- a/contrib/llvm/lib/CodeGen/LiveRangeShrink.cpp +++ b/contrib/llvm/lib/CodeGen/LiveRangeShrink.cpp @@ -111,7 +111,7 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); - DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n'); + LLVM_DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n'); InstOrderMap IOM; // Map from register to instruction order (value of IOM) where the @@ -130,7 +130,7 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) { for (MachineBasicBlock::iterator Next = MBB.begin(); Next != MBB.end();) { MachineInstr &MI = *Next; ++Next; - if (MI.isPHI() || MI.isDebugValue()) + if (MI.isPHI() || MI.isDebugInstr()) continue; if (MI.mayStore()) SawStore = true; @@ -218,7 +218,7 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) { if (DefMO && Insert && NumEligibleUse > 1 && Barrier <= IOM[Insert]) { MachineBasicBlock::iterator I = std::next(Insert->getIterator()); // Skip all the PHI and debug instructions. - while (I != MBB.end() && (I->isPHI() || I->isDebugValue())) + while (I != MBB.end() && (I->isPHI() || I->isDebugInstr())) I = std::next(I); if (I == MI.getIterator()) continue; diff --git a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp index bd435968296d..e72977b02675 100644 --- a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp +++ b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp @@ -102,37 +102,37 @@ static bool foreachUnit(const TargetRegisterInfo *TRI, } void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) { - DEBUG(dbgs() << "assigning " << printReg(VirtReg.reg, TRI) - << " to " << printReg(PhysReg, TRI) << ':'); + LLVM_DEBUG(dbgs() << "assigning " << printReg(VirtReg.reg, TRI) << " to " + << printReg(PhysReg, TRI) << ':'); assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment"); VRM->assignVirt2Phys(VirtReg.reg, PhysReg); - foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit, - const LiveRange &Range) { - DEBUG(dbgs() << ' ' << printRegUnit(Unit, TRI) << ' ' << Range); - Matrix[Unit].unify(VirtReg, Range); - return false; - }); + foreachUnit( + TRI, VirtReg, PhysReg, [&](unsigned Unit, const LiveRange &Range) { + LLVM_DEBUG(dbgs() << ' ' << printRegUnit(Unit, TRI) << ' ' << Range); + Matrix[Unit].unify(VirtReg, Range); + return false; + }); ++NumAssigned; - DEBUG(dbgs() << '\n'); + LLVM_DEBUG(dbgs() << '\n'); } void LiveRegMatrix::unassign(LiveInterval &VirtReg) { unsigned PhysReg = VRM->getPhys(VirtReg.reg); - DEBUG(dbgs() << "unassigning " << printReg(VirtReg.reg, TRI) - << " from " << printReg(PhysReg, TRI) << ':'); + LLVM_DEBUG(dbgs() << "unassigning " << printReg(VirtReg.reg, TRI) << " from " + << printReg(PhysReg, TRI) << ':'); VRM->clearVirt(VirtReg.reg); - foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit, - const LiveRange &Range) { - DEBUG(dbgs() << ' ' << printRegUnit(Unit, TRI)); - Matrix[Unit].extract(VirtReg, Range); - return false; - }); + foreachUnit(TRI, VirtReg, PhysReg, + [&](unsigned Unit, const LiveRange &Range) { + LLVM_DEBUG(dbgs() << ' ' << printRegUnit(Unit, TRI)); + Matrix[Unit].extract(VirtReg, Range); + return false; + }); ++NumUnassigned; - DEBUG(dbgs() << '\n'); + LLVM_DEBUG(dbgs() << '\n'); } bool LiveRegMatrix::isPhysRegUsed(unsigned PhysReg) const { @@ -205,3 +205,19 @@ LiveRegMatrix::checkInterference(LiveInterval &VirtReg, unsigned PhysReg) { return IK_Free; } + +bool LiveRegMatrix::checkInterference(SlotIndex Start, SlotIndex End, + unsigned PhysReg) { + // Construct artificial live range containing only one segment [Start, End). + VNInfo valno(0, Start); + LiveRange::Segment Seg(Start, End, &valno); + LiveRange LR; + LR.addSegment(Seg); + + // Check for interference with that segment + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + if (query(LR, *Units).checkInterference()) + return true; + } + return false; +} diff --git a/contrib/llvm/lib/CodeGen/LiveRegUnits.cpp b/contrib/llvm/lib/CodeGen/LiveRegUnits.cpp index 9f28db6287ba..c22681385492 100644 --- a/contrib/llvm/lib/CodeGen/LiveRegUnits.cpp +++ b/contrib/llvm/lib/CodeGen/LiveRegUnits.cpp @@ -46,7 +46,7 @@ void LiveRegUnits::stepBackward(const MachineInstr &MI) { // Remove defined registers and regmask kills from the set. for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { if (O->isReg()) { - if (!O->isDef()) + if (!O->isDef() || O->isDebug()) continue; unsigned Reg = O->getReg(); if (!TargetRegisterInfo::isPhysicalRegister(Reg)) @@ -58,7 +58,7 @@ void LiveRegUnits::stepBackward(const MachineInstr &MI) { // Add uses to the set. for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { - if (!O->isReg() || !O->readsReg()) + if (!O->isReg() || !O->readsReg() || O->isDebug()) continue; unsigned Reg = O->getReg(); if (!TargetRegisterInfo::isPhysicalRegister(Reg)) diff --git a/contrib/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm/lib/CodeGen/LiveVariables.cpp index 032dd66ae1d2..0b92eab83806 100644 --- a/contrib/llvm/lib/CodeGen/LiveVariables.cpp +++ b/contrib/llvm/lib/CodeGen/LiveVariables.cpp @@ -34,6 +34,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/Config/llvm-config.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -498,7 +499,7 @@ void LiveVariables::UpdatePhysRegDefs(MachineInstr &MI, void LiveVariables::runOnInstr(MachineInstr &MI, SmallVectorImpl<unsigned> &Defs) { - assert(!MI.isDebugValue()); + assert(!MI.isDebugInstr()); // Process all of the operands of the instruction... unsigned NumOperandsToProcess = MI.getNumOperands(); @@ -575,7 +576,7 @@ void LiveVariables::runOnBlock(MachineBasicBlock *MBB, const unsigned NumRegs) { DistanceMap.clear(); unsigned Dist = 0; for (MachineInstr &MI : *MBB) { - if (MI.isDebugValue()) + if (MI.isDebugInstr()) continue; DistanceMap.insert(std::make_pair(&MI, Dist++)); diff --git a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp index c0da37ede849..f90ce0c8cd2a 100644 --- a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -25,7 +25,6 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/StackProtector.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -99,7 +98,6 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); - AU.addRequired<StackProtector>(); MachineFunctionPass::getAnalysisUsage(AU); } }; @@ -109,12 +107,8 @@ namespace { char LocalStackSlotPass::ID = 0; char &llvm::LocalStackSlotAllocationID = LocalStackSlotPass::ID; - -INITIALIZE_PASS_BEGIN(LocalStackSlotPass, DEBUG_TYPE, - "Local Stack Slot Allocation", false, false) -INITIALIZE_PASS_DEPENDENCY(StackProtector) -INITIALIZE_PASS_END(LocalStackSlotPass, DEBUG_TYPE, - "Local Stack Slot Allocation", false, false) +INITIALIZE_PASS(LocalStackSlotPass, DEBUG_TYPE, + "Local Stack Slot Allocation", false, false) bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) { MachineFrameInfo &MFI = MF.getFrameInfo(); @@ -164,8 +158,8 @@ void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo &MFI, Offset = (Offset + Align - 1) / Align * Align; int64_t LocalOffset = StackGrowsDown ? -Offset : Offset; - DEBUG(dbgs() << "Allocate FI(" << FrameIdx << ") to local offset " - << LocalOffset << "\n"); + LLVM_DEBUG(dbgs() << "Allocate FI(" << FrameIdx << ") to local offset " + << LocalOffset << "\n"); // Keep the offset available for base register allocation LocalOffsets[FrameIdx] = LocalOffset; // And tell MFI about it for PEI to use later @@ -202,7 +196,6 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; int64_t Offset = 0; unsigned MaxAlign = 0; - StackProtector *SP = &getAnalysis<StackProtector>(); // Make sure that the stack protector comes before the local variables on the // stack. @@ -222,16 +215,16 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { if (MFI.getStackProtectorIndex() == (int)i) continue; - switch (SP->getSSPLayout(MFI.getObjectAllocation(i))) { - case StackProtector::SSPLK_None: + switch (MFI.getObjectSSPLayout(i)) { + case MachineFrameInfo::SSPLK_None: continue; - case StackProtector::SSPLK_SmallArray: + case MachineFrameInfo::SSPLK_SmallArray: SmallArrayObjs.insert(i); continue; - case StackProtector::SSPLK_AddrOf: + case MachineFrameInfo::SSPLK_AddrOf: AddrOfObjs.insert(i); continue; - case StackProtector::SSPLK_LargeArray: + case MachineFrameInfo::SSPLK_LargeArray: LargeArrayObjs.insert(i); continue; } @@ -304,7 +297,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { for (MachineInstr &MI : BB) { // Debug value, stackmap and patchpoint instructions can't be out of // range, so they don't need any updates. - if (MI.isDebugValue() || MI.getOpcode() == TargetOpcode::STATEPOINT || + if (MI.isDebugInstr() || MI.getOpcode() == TargetOpcode::STATEPOINT || MI.getOpcode() == TargetOpcode::STACKMAP || MI.getOpcode() == TargetOpcode::PATCHPOINT) continue; @@ -335,7 +328,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { // Sort the frame references by local offset. // Use frame index as a tie-breaker in case MI's have the same offset. - std::sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end()); + llvm::sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end()); MachineBasicBlock *Entry = &Fn.front(); @@ -351,7 +344,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { assert(MFI.isObjectPreAllocated(FrameIdx) && "Only pre-allocated locals expected!"); - DEBUG(dbgs() << "Considering: " << MI); + LLVM_DEBUG(dbgs() << "Considering: " << MI); unsigned idx = 0; for (unsigned f = MI.getNumOperands(); idx != f; ++idx) { @@ -367,7 +360,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { int64_t Offset = 0; int64_t FrameSizeAdjust = StackGrowsDown ? MFI.getLocalFrameSize() : 0; - DEBUG(dbgs() << " Replacing FI in: " << MI); + LLVM_DEBUG(dbgs() << " Replacing FI in: " << MI); // If we have a suitable base register available, use it; otherwise // create a new one. Note that any offset encoded in the @@ -377,7 +370,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { if (UsedBaseReg && lookupCandidateBaseReg(BaseReg, BaseOffset, FrameSizeAdjust, LocalOffset, MI, TRI)) { - DEBUG(dbgs() << " Reusing base register " << BaseReg << "\n"); + LLVM_DEBUG(dbgs() << " Reusing base register " << BaseReg << "\n"); // We found a register to reuse. Offset = FrameSizeAdjust + LocalOffset - BaseOffset; } else { @@ -405,8 +398,9 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { const TargetRegisterClass *RC = TRI->getPointerRegClass(*MF); BaseReg = Fn.getRegInfo().createVirtualRegister(RC); - DEBUG(dbgs() << " Materializing base register " << BaseReg << - " at frame local offset " << LocalOffset + InstrOffset << "\n"); + LLVM_DEBUG(dbgs() << " Materializing base register " << BaseReg + << " at frame local offset " + << LocalOffset + InstrOffset << "\n"); // Tell the target to insert the instruction to initialize // the base register. @@ -427,7 +421,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { // Modify the instruction to use the new base register rather // than the frame index operand. TRI->resolveFrameIndex(MI, BaseReg, Offset); - DEBUG(dbgs() << "Resolved: " << MI); + LLVM_DEBUG(dbgs() << "Resolved: " << MI); ++NumReplacements; } diff --git a/contrib/llvm/lib/CodeGen/LoopTraversal.cpp b/contrib/llvm/lib/CodeGen/LoopTraversal.cpp new file mode 100644 index 000000000000..a02d10e09d7d --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LoopTraversal.cpp @@ -0,0 +1,77 @@ +//===- LoopTraversal.cpp - Optimal basic block traversal order --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/LoopTraversal.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/CodeGen/MachineFunction.h" + +using namespace llvm; + +bool LoopTraversal::isBlockDone(MachineBasicBlock *MBB) { + unsigned MBBNumber = MBB->getNumber(); + assert(MBBNumber < MBBInfos.size() && "Unexpected basic block number."); + return MBBInfos[MBBNumber].PrimaryCompleted && + MBBInfos[MBBNumber].IncomingCompleted == + MBBInfos[MBBNumber].PrimaryIncoming && + MBBInfos[MBBNumber].IncomingProcessed == MBB->pred_size(); +} + +LoopTraversal::TraversalOrder LoopTraversal::traverse(MachineFunction &MF) { + // Initialize the MMBInfos + MBBInfos.assign(MF.getNumBlockIDs(), MBBInfo()); + + MachineBasicBlock *Entry = &*MF.begin(); + ReversePostOrderTraversal<MachineBasicBlock *> RPOT(Entry); + SmallVector<MachineBasicBlock *, 4> Workqueue; + SmallVector<TraversedMBBInfo, 4> MBBTraversalOrder; + for (MachineBasicBlock *MBB : RPOT) { + // N.B: IncomingProcessed and IncomingCompleted were already updated while + // processing this block's predecessors. + unsigned MBBNumber = MBB->getNumber(); + assert(MBBNumber < MBBInfos.size() && "Unexpected basic block number."); + MBBInfos[MBBNumber].PrimaryCompleted = true; + MBBInfos[MBBNumber].PrimaryIncoming = MBBInfos[MBBNumber].IncomingProcessed; + bool Primary = true; + Workqueue.push_back(MBB); + while (!Workqueue.empty()) { + MachineBasicBlock *ActiveMBB = &*Workqueue.back(); + Workqueue.pop_back(); + bool Done = isBlockDone(ActiveMBB); + MBBTraversalOrder.push_back(TraversedMBBInfo(ActiveMBB, Primary, Done)); + for (MachineBasicBlock *Succ : ActiveMBB->successors()) { + unsigned SuccNumber = Succ->getNumber(); + assert(SuccNumber < MBBInfos.size() && + "Unexpected basic block number."); + if (!isBlockDone(Succ)) { + if (Primary) + MBBInfos[SuccNumber].IncomingProcessed++; + if (Done) + MBBInfos[SuccNumber].IncomingCompleted++; + if (isBlockDone(Succ)) + Workqueue.push_back(Succ); + } + } + Primary = false; + } + } + + // We need to go through again and finalize any blocks that are not done yet. + // This is possible if blocks have dead predecessors, so we didn't visit them + // above. + for (MachineBasicBlock *MBB : RPOT) { + if (!isBlockDone(MBB)) + MBBTraversalOrder.push_back(TraversedMBBInfo(MBB, false, true)); + // Don't update successors here. We'll get to them anyway through this + // loop. + } + + MBBInfos.clear(); + + return MBBTraversalOrder; +} diff --git a/contrib/llvm/lib/CodeGen/LowerEmuTLS.cpp b/contrib/llvm/lib/CodeGen/LowerEmuTLS.cpp index 0cf578b50563..36c1d358a9bd 100644 --- a/contrib/llvm/lib/CodeGen/LowerEmuTLS.cpp +++ b/contrib/llvm/lib/CodeGen/LowerEmuTLS.cpp @@ -68,7 +68,7 @@ bool LowerEmuTLS::runOnModule(Module &M) { return false; auto &TM = TPC->getTM<TargetMachine>(); - if (!TM.Options.EmulatedTLS) + if (!TM.useEmulatedTLS()) return false; bool Changed = false; diff --git a/contrib/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp b/contrib/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp index 4b676a60a8cd..fa43d13b1b85 100644 --- a/contrib/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp +++ b/contrib/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp @@ -43,14 +43,13 @@ extern char &MIRCanonicalizerID; #define DEBUG_TYPE "mir-canonicalizer" static cl::opt<unsigned> -CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u), - cl::value_desc("N"), - cl::desc("Function number to canonicalize.")); + CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u), + cl::value_desc("N"), + cl::desc("Function number to canonicalize.")); -static cl::opt<unsigned> -CanonicalizeBasicBlockNumber("canon-nth-basicblock", cl::Hidden, cl::init(~0u), - cl::value_desc("N"), - cl::desc("BasicBlock number to canonicalize.")); +static cl::opt<unsigned> CanonicalizeBasicBlockNumber( + "canon-nth-basicblock", cl::Hidden, cl::init(~0u), cl::value_desc("N"), + cl::desc("BasicBlock number to canonicalize.")); namespace { @@ -84,9 +83,9 @@ public: assert(type != RSE_Reg && "Expected a non-register type."); } - bool isReg() const { return type == RSE_Reg; } - bool isFrameIndex() const { return type == RSE_FrameIndex; } - bool isCandidate() const { return type == RSE_NewCandidate; } + bool isReg() const { return type == RSE_Reg; } + bool isFrameIndex() const { return type == RSE_FrameIndex; } + bool isCandidate() const { return type == RSE_NewCandidate; } VRType getType() const { return type; } unsigned getReg() const { @@ -115,23 +114,49 @@ static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) { return RPOList; } -// Set a dummy vreg. We use this vregs register class to generate throw-away -// vregs that are used to skip vreg numbers so that vreg numbers line up. -static unsigned GetDummyVReg(const MachineFunction &MF) { - for (auto &MBB : MF) { - for (auto &MI : MBB) { - for (auto &MO : MI.operands()) { - if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) - continue; - return MO.getReg(); - } - } +static bool +rescheduleLexographically(std::vector<MachineInstr *> instructions, + MachineBasicBlock *MBB, + std::function<MachineBasicBlock::iterator()> getPos) { + + bool Changed = false; + using StringInstrPair = std::pair<std::string, MachineInstr *>; + std::vector<StringInstrPair> StringInstrMap; + + for (auto *II : instructions) { + std::string S; + raw_string_ostream OS(S); + II->print(OS); + OS.flush(); + + // Trim the assignment, or start from the begining in the case of a store. + const size_t i = S.find("="); + StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II}); + } + + llvm::sort(StringInstrMap.begin(), StringInstrMap.end(), + [](const StringInstrPair &a, const StringInstrPair &b) -> bool { + return (a.first < b.first); + }); + + for (auto &II : StringInstrMap) { + + LLVM_DEBUG({ + dbgs() << "Splicing "; + II.second->dump(); + dbgs() << " right before: "; + getPos()->dump(); + }); + + Changed = true; + MBB->splice(getPos(), MBB, II.second); } - return ~0U; + return Changed; } -static bool rescheduleCanonically(MachineBasicBlock *MBB) { +static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount, + MachineBasicBlock *MBB) { bool Changed = false; @@ -153,15 +178,62 @@ static bool rescheduleCanonically(MachineBasicBlock *MBB) { Instructions.push_back(&MI); } + std::map<MachineInstr *, std::vector<MachineInstr *>> MultiUsers; + std::vector<MachineInstr *> PseudoIdempotentInstructions; + std::vector<unsigned> PhysRegDefs; + for (auto *II : Instructions) { + for (unsigned i = 1; i < II->getNumOperands(); i++) { + MachineOperand &MO = II->getOperand(i); + if (!MO.isReg()) + continue; + + if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; + + if (!MO.isDef()) + continue; + + PhysRegDefs.push_back(MO.getReg()); + } + } + for (auto *II : Instructions) { if (II->getNumOperands() == 0) continue; + if (II->mayLoadOrStore()) + continue; MachineOperand &MO = II->getOperand(0); if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; + if (!MO.isDef()) + continue; + + bool IsPseudoIdempotent = true; + for (unsigned i = 1; i < II->getNumOperands(); i++) { + + if (II->getOperand(i).isImm()) { + continue; + } + + if (II->getOperand(i).isReg()) { + if (!TargetRegisterInfo::isVirtualRegister(II->getOperand(i).getReg())) + if (llvm::find(PhysRegDefs, II->getOperand(i).getReg()) == + PhysRegDefs.end()) { + continue; + } + } - DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump();); + IsPseudoIdempotent = false; + break; + } + + if (IsPseudoIdempotent) { + PseudoIdempotentInstructions.push_back(II); + continue; + } + + LLVM_DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump();); MachineInstr *Def = II; unsigned Distance = ~0U; @@ -194,9 +266,6 @@ static bool rescheduleCanonically(MachineBasicBlock *MBB) { if (DefI != BBE && UseI != BBE) break; - if ((&*BBI != Def) && (&*BBI != UseToBringDefCloserTo)) - continue; - if (&*BBI == Def) { DefI = BBI; continue; @@ -211,17 +280,80 @@ static bool rescheduleCanonically(MachineBasicBlock *MBB) { if (DefI == BBE || UseI == BBE) continue; - DEBUG({ + LLVM_DEBUG({ dbgs() << "Splicing "; DefI->dump(); dbgs() << " right before: "; UseI->dump(); }); + MultiUsers[UseToBringDefCloserTo].push_back(Def); Changed = true; MBB->splice(UseI, MBB, DefI); } + // Sort the defs for users of multiple defs lexographically. + for (const auto &E : MultiUsers) { + + auto UseI = + std::find_if(MBB->instr_begin(), MBB->instr_end(), + [&](MachineInstr &MI) -> bool { return &MI == E.first; }); + + if (UseI == MBB->instr_end()) + continue; + + LLVM_DEBUG( + dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";); + Changed |= rescheduleLexographically( + E.second, MBB, [&]() -> MachineBasicBlock::iterator { return UseI; }); + } + + PseudoIdempotentInstCount = PseudoIdempotentInstructions.size(); + LLVM_DEBUG( + dbgs() << "Rescheduling Idempotent Instructions Lexographically.";); + Changed |= rescheduleLexographically( + PseudoIdempotentInstructions, MBB, + [&]() -> MachineBasicBlock::iterator { return MBB->begin(); }); + + return Changed; +} + +static bool propagateLocalCopies(MachineBasicBlock *MBB) { + bool Changed = false; + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + + std::vector<MachineInstr *> Copies; + for (MachineInstr &MI : MBB->instrs()) { + if (MI.isCopy()) + Copies.push_back(&MI); + } + + for (MachineInstr *MI : Copies) { + + if (!MI->getOperand(0).isReg()) + continue; + if (!MI->getOperand(1).isReg()) + continue; + + const unsigned Dst = MI->getOperand(0).getReg(); + const unsigned Src = MI->getOperand(1).getReg(); + + if (!TargetRegisterInfo::isVirtualRegister(Dst)) + continue; + if (!TargetRegisterInfo::isVirtualRegister(Src)) + continue; + if (MRI.getRegClass(Dst) != MRI.getRegClass(Src)) + continue; + + for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) { + MachineOperand *MO = &*UI; + MO->setReg(Src); + Changed = true; + } + + MI->eraseFromParent(); + } + return Changed; } @@ -245,7 +377,8 @@ static std::vector<MachineInstr *> populateCandidates(MachineBasicBlock *MBB) { DoesMISideEffect |= !TargetRegisterInfo::isVirtualRegister(Dst); for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) { - if (DoesMISideEffect) break; + if (DoesMISideEffect) + break; DoesMISideEffect |= (UI->getParent()->getParent() != MI->getParent()); } } @@ -253,7 +386,7 @@ static std::vector<MachineInstr *> populateCandidates(MachineBasicBlock *MBB) { if (!MI->mayStore() && !MI->isBranch() && !DoesMISideEffect) continue; - DEBUG(dbgs() << "Found Candidate: "; MI->dump();); + LLVM_DEBUG(dbgs() << "Found Candidate: "; MI->dump();); Candidates.push_back(MI); } @@ -274,7 +407,7 @@ static void doCandidateWalk(std::vector<TypedVReg> &VRegs, RegQueue.pop(); if (TReg.isFrameIndex()) { - DEBUG(dbgs() << "Popping frame index.\n";); + LLVM_DEBUG(dbgs() << "Popping frame index.\n";); VRegs.push_back(TypedVReg(RSE_FrameIndex)); continue; } @@ -283,7 +416,7 @@ static void doCandidateWalk(std::vector<TypedVReg> &VRegs, unsigned Reg = TReg.getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) { - DEBUG({ + LLVM_DEBUG({ dbgs() << "Popping vreg "; MRI.def_begin(Reg)->dump(); dbgs() << "\n"; @@ -295,7 +428,7 @@ static void doCandidateWalk(std::vector<TypedVReg> &VRegs, VRegs.push_back(TypedVReg(Reg)); } } else { - DEBUG(dbgs() << "Popping physreg.\n";); + LLVM_DEBUG(dbgs() << "Popping physreg.\n";); VRegs.push_back(TypedVReg(Reg)); continue; } @@ -311,7 +444,7 @@ static void doCandidateWalk(std::vector<TypedVReg> &VRegs, break; } - DEBUG({ + LLVM_DEBUG({ dbgs() << "\n========================\n"; dbgs() << "Visited MI: "; Def->dump(); @@ -323,7 +456,7 @@ static void doCandidateWalk(std::vector<TypedVReg> &VRegs, MachineOperand &MO = Def->getOperand(I); if (MO.isFI()) { - DEBUG(dbgs() << "Pushing frame index.\n";); + LLVM_DEBUG(dbgs() << "Pushing frame index.\n";); RegQueue.push(TypedVReg(RSE_FrameIndex)); } @@ -335,33 +468,56 @@ static void doCandidateWalk(std::vector<TypedVReg> &VRegs, } } -// TODO: Work to remove this in the future. One day when we have named vregs -// we should be able to form the canonical name based on some characteristic -// we see in that point of the expression tree (like if we were to name based -// on some sort of value numbering scheme). -static void SkipVRegs(unsigned &VRegGapIndex, MachineRegisterInfo &MRI, - const TargetRegisterClass *RC) { - const unsigned VR_GAP = (++VRegGapIndex * 1000); - - DEBUG({ - dbgs() << "Adjusting per-BB VR_GAP for BB" << VRegGapIndex << " to " - << VR_GAP << "\n"; - }); +namespace { +class NamedVRegCursor { + MachineRegisterInfo &MRI; + unsigned virtualVRegNumber; + +public: + NamedVRegCursor(MachineRegisterInfo &MRI) : MRI(MRI) { + unsigned VRegGapIndex = 0; + const unsigned VR_GAP = (++VRegGapIndex * 1000); + + unsigned I = MRI.createIncompleteVirtualRegister(); + const unsigned E = (((I + VR_GAP) / VR_GAP) + 1) * VR_GAP; - unsigned I = MRI.createVirtualRegister(RC); - const unsigned E = (((I + VR_GAP) / VR_GAP) + 1) * VR_GAP; - while (I != E) { - I = MRI.createVirtualRegister(RC); + virtualVRegNumber = E; } -} + + void SkipVRegs() { + unsigned VRegGapIndex = 1; + const unsigned VR_GAP = (++VRegGapIndex * 1000); + + unsigned I = virtualVRegNumber; + const unsigned E = (((I + VR_GAP) / VR_GAP) + 1) * VR_GAP; + + virtualVRegNumber = E; + } + + unsigned getVirtualVReg() const { return virtualVRegNumber; } + + unsigned incrementVirtualVReg(unsigned incr = 1) { + virtualVRegNumber += incr; + return virtualVRegNumber; + } + + unsigned createVirtualRegister(const TargetRegisterClass *RC) { + std::string S; + raw_string_ostream OS(S); + OS << "namedVReg" << (virtualVRegNumber & ~0x80000000); + OS.flush(); + virtualVRegNumber++; + + return MRI.createVirtualRegister(RC, OS.str()); + } +}; +} // namespace static std::map<unsigned, unsigned> GetVRegRenameMap(const std::vector<TypedVReg> &VRegs, const std::vector<unsigned> &renamedInOtherBB, - MachineRegisterInfo &MRI, - const TargetRegisterClass *RC) { + MachineRegisterInfo &MRI, NamedVRegCursor &NVC) { std::map<unsigned, unsigned> VRegRenameMap; - unsigned LastRenameReg = MRI.createVirtualRegister(RC); bool FirstCandidate = true; for (auto &vreg : VRegs) { @@ -370,8 +526,9 @@ GetVRegRenameMap(const std::vector<TypedVReg> &VRegs, // (especially when comparing SelectionDAG to GlobalISel generated MIR) // that in the other file we are just getting an incoming vreg that comes // from a copy from a frame index. So it's safe to skip by one. - LastRenameReg = MRI.createVirtualRegister(RC); - DEBUG(dbgs() << "Skipping rename for FI " << LastRenameReg << "\n";); + unsigned LastRenameReg = NVC.incrementVirtualVReg(); + (void)LastRenameReg; + LLVM_DEBUG(dbgs() << "Skipping rename for FI " << LastRenameReg << "\n";); continue; } else if (vreg.isCandidate()) { @@ -380,20 +537,15 @@ GetVRegRenameMap(const std::vector<TypedVReg> &VRegs, // same vreg number making it more likely that the canonical walk from the // candidate insruction. We don't need to skip from the first candidate of // the BasicBlock because we already skip ahead several vregs for each BB. - while (LastRenameReg % 10) { - if (!FirstCandidate) break; - LastRenameReg = MRI.createVirtualRegister(RC); - - DEBUG({ - dbgs() << "Skipping rename for new candidate " << LastRenameReg - << "\n"; - }); - } + unsigned LastRenameReg = NVC.getVirtualVReg(); + if (FirstCandidate) + NVC.incrementVirtualVReg(LastRenameReg % 10); FirstCandidate = false; continue; } else if (!TargetRegisterInfo::isVirtualRegister(vreg.getReg())) { - LastRenameReg = MRI.createVirtualRegister(RC); - DEBUG({ + unsigned LastRenameReg = NVC.incrementVirtualVReg(); + (void)LastRenameReg; + LLVM_DEBUG({ dbgs() << "Skipping rename for Phys Reg " << LastRenameReg << "\n"; }); continue; @@ -401,27 +553,27 @@ GetVRegRenameMap(const std::vector<TypedVReg> &VRegs, auto Reg = vreg.getReg(); if (llvm::find(renamedInOtherBB, Reg) != renamedInOtherBB.end()) { - DEBUG(dbgs() << "Vreg " << Reg << " already renamed in other BB.\n";); + LLVM_DEBUG(dbgs() << "Vreg " << Reg + << " already renamed in other BB.\n";); continue; } - auto Rename = MRI.createVirtualRegister(MRI.getRegClass(Reg)); - LastRenameReg = Rename; + auto Rename = NVC.createVirtualRegister(MRI.getRegClass(Reg)); if (VRegRenameMap.find(Reg) == VRegRenameMap.end()) { - DEBUG(dbgs() << "Mapping vreg ";); + LLVM_DEBUG(dbgs() << "Mapping vreg ";); if (MRI.reg_begin(Reg) != MRI.reg_end()) { - DEBUG(auto foo = &*MRI.reg_begin(Reg); foo->dump();); + LLVM_DEBUG(auto foo = &*MRI.reg_begin(Reg); foo->dump();); } else { - DEBUG(dbgs() << Reg;); + LLVM_DEBUG(dbgs() << Reg;); } - DEBUG(dbgs() << " to ";); + LLVM_DEBUG(dbgs() << " to ";); if (MRI.reg_begin(Rename) != MRI.reg_end()) { - DEBUG(auto foo = &*MRI.reg_begin(Rename); foo->dump();); + LLVM_DEBUG(auto foo = &*MRI.reg_begin(Rename); foo->dump();); } else { - DEBUG(dbgs() << Rename;); + LLVM_DEBUG(dbgs() << Rename;); } - DEBUG(dbgs() << "\n";); + LLVM_DEBUG(dbgs() << "\n";); VRegRenameMap.insert(std::pair<unsigned, unsigned>(Reg, Rename)); } @@ -483,23 +635,25 @@ static bool doDefKillClear(MachineBasicBlock *MBB) { static bool runOnBasicBlock(MachineBasicBlock *MBB, std::vector<StringRef> &bbNames, std::vector<unsigned> &renamedInOtherBB, - unsigned &basicBlockNum, unsigned &VRegGapIndex) { + unsigned &basicBlockNum, unsigned &VRegGapIndex, + NamedVRegCursor &NVC) { if (CanonicalizeBasicBlockNumber != ~0U) { if (CanonicalizeBasicBlockNumber != basicBlockNum++) return false; - DEBUG(dbgs() << "\n Canonicalizing BasicBlock " << MBB->getName() << "\n";); + LLVM_DEBUG(dbgs() << "\n Canonicalizing BasicBlock " << MBB->getName() + << "\n";); } if (llvm::find(bbNames, MBB->getName()) != bbNames.end()) { - DEBUG({ + LLVM_DEBUG({ dbgs() << "Found potentially duplicate BasicBlocks: " << MBB->getName() << "\n"; }); return false; } - DEBUG({ + LLVM_DEBUG({ dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << " \n\n"; dbgs() << "\n\n================================================\n\n"; }); @@ -508,17 +662,18 @@ static bool runOnBasicBlock(MachineBasicBlock *MBB, MachineFunction &MF = *MBB->getParent(); MachineRegisterInfo &MRI = MF.getRegInfo(); - const unsigned DummyVReg = GetDummyVReg(MF); - const TargetRegisterClass *DummyRC = - (DummyVReg == ~0U) ? nullptr : MRI.getRegClass(DummyVReg); - if (!DummyRC) return false; - bbNames.push_back(MBB->getName()); - DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";); + LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";); - DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump();); - Changed |= rescheduleCanonically(MBB); - DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump();); + LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n"; + MBB->dump();); + Changed |= propagateLocalCopies(MBB); + LLVM_DEBUG(dbgs() << "MBB After Canonical Copy Propagation:\n"; MBB->dump();); + + LLVM_DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump();); + unsigned IdempotentInstCount = 0; + Changed |= rescheduleCanonically(IdempotentInstCount, MBB); + LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump();); std::vector<MachineInstr *> Candidates = populateCandidates(MBB); std::vector<MachineInstr *> VisitedMIs; @@ -543,7 +698,7 @@ static bool runOnBasicBlock(MachineBasicBlock *MBB, if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))) continue; - DEBUG(dbgs() << "Enqueue register"; MO.dump(); dbgs() << "\n";); + LLVM_DEBUG(dbgs() << "Enqueue register"; MO.dump(); dbgs() << "\n";); RegQueue.push(TypedVReg(MO.getReg())); } @@ -560,10 +715,10 @@ static bool runOnBasicBlock(MachineBasicBlock *MBB, if (!MO.isReg() && !MO.isFI()) continue; - DEBUG(dbgs() << "Enqueue Reg/FI"; MO.dump(); dbgs() << "\n";); + LLVM_DEBUG(dbgs() << "Enqueue Reg/FI"; MO.dump(); dbgs() << "\n";); - RegQueue.push(MO.isReg() ? TypedVReg(MO.getReg()) : - TypedVReg(RSE_FrameIndex)); + RegQueue.push(MO.isReg() ? TypedVReg(MO.getReg()) + : TypedVReg(RSE_FrameIndex)); } doCandidateWalk(VRegs, RegQueue, VisitedMIs, MBB); @@ -574,15 +729,38 @@ static bool runOnBasicBlock(MachineBasicBlock *MBB, if (VRegs.size() == 0) return Changed; - // Skip some vregs, so we can recon where we'll land next. - SkipVRegs(VRegGapIndex, MRI, DummyRC); - - auto VRegRenameMap = GetVRegRenameMap(VRegs, renamedInOtherBB, MRI, DummyRC); + auto VRegRenameMap = GetVRegRenameMap(VRegs, renamedInOtherBB, MRI, NVC); Changed |= doVRegRenaming(renamedInOtherBB, VRegRenameMap, MRI); + + // Here we renumber the def vregs for the idempotent instructions from the top + // of the MachineBasicBlock so that they are named in the order that we sorted + // them alphabetically. Eventually we wont need SkipVRegs because we will use + // named vregs instead. + NVC.SkipVRegs(); + + auto MII = MBB->begin(); + for (unsigned i = 0; i < IdempotentInstCount && MII != MBB->end(); ++i) { + MachineInstr &MI = *MII++; + Changed = true; + unsigned vRegToRename = MI.getOperand(0).getReg(); + auto Rename = NVC.createVirtualRegister(MRI.getRegClass(vRegToRename)); + + std::vector<MachineOperand *> RenameMOs; + for (auto &MO : MRI.reg_operands(vRegToRename)) { + RenameMOs.push_back(&MO); + } + + for (auto *MO : RenameMOs) { + MO->setReg(Rename); + } + } + Changed |= doDefKillClear(MBB); - DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump(); dbgs() << "\n";); - DEBUG(dbgs() << "\n\n================================================\n\n"); + LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump(); + dbgs() << "\n";); + LLVM_DEBUG( + dbgs() << "\n\n================================================\n\n"); return Changed; } @@ -592,22 +770,21 @@ bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) { if (CanonicalizeFunctionNumber != ~0U) { if (CanonicalizeFunctionNumber != functionNum++) return false; - DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName() << "\n";); + LLVM_DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName() + << "\n";); } // we need a valid vreg to create a vreg type for skipping all those // stray vreg numbers so reach alignment/canonical vreg values. - std::vector<MachineBasicBlock*> RPOList = GetRPOList(MF); + std::vector<MachineBasicBlock *> RPOList = GetRPOList(MF); - DEBUG( - dbgs() << "\n\n NEW MACHINE FUNCTION: " << MF.getName() << " \n\n"; - dbgs() << "\n\n================================================\n\n"; - dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n"; - for (auto MBB : RPOList) { - dbgs() << MBB->getName() << "\n"; - } - dbgs() << "\n\n================================================\n\n"; - ); + LLVM_DEBUG( + dbgs() << "\n\n NEW MACHINE FUNCTION: " << MF.getName() << " \n\n"; + dbgs() << "\n\n================================================\n\n"; + dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n"; + for (auto MBB + : RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs() + << "\n\n================================================\n\n";); std::vector<StringRef> BBNames; std::vector<unsigned> RenamedInOtherBB; @@ -617,9 +794,11 @@ bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; + MachineRegisterInfo &MRI = MF.getRegInfo(); + NamedVRegCursor NVC(MRI); for (auto MBB : RPOList) - Changed |= runOnBasicBlock(MBB, BBNames, RenamedInOtherBB, BBNum, GapIdx); + Changed |= + runOnBasicBlock(MBB, BBNames, RenamedInOtherBB, BBNum, GapIdx, NVC); return Changed; } - diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp index 6adb7f1288d7..da05c9a22785 100644 --- a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp +++ b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp @@ -179,23 +179,6 @@ static Cursor lexName(Cursor C, MIToken &Token, MIToken::TokenKind Type, return C; } -static Cursor maybeLexIntegerOrScalarType(Cursor C, MIToken &Token) { - if ((C.peek() != 'i' && C.peek() != 's' && C.peek() != 'p') || - !isdigit(C.peek(1))) - return None; - char Kind = C.peek(); - auto Range = C; - C.advance(); // Skip 'i', 's', or 'p' - while (isdigit(C.peek())) - C.advance(); - - Token.reset(Kind == 'i' - ? MIToken::IntegerType - : (Kind == 's' ? MIToken::ScalarType : MIToken::PointerType), - Range.upto(C)); - return C; -} - static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { return StringSwitch<MIToken::TokenKind>(Identifier) .Case("_", MIToken::underscore) @@ -211,6 +194,14 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("renamable", MIToken::kw_renamable) .Case("tied-def", MIToken::kw_tied_def) .Case("frame-setup", MIToken::kw_frame_setup) + .Case("frame-destroy", MIToken::kw_frame_destroy) + .Case("nnan", MIToken::kw_nnan) + .Case("ninf", MIToken::kw_ninf) + .Case("nsz", MIToken::kw_nsz) + .Case("arcp", MIToken::kw_arcp) + .Case("contract", MIToken::kw_contract) + .Case("afn", MIToken::kw_afn) + .Case("reassoc", MIToken::kw_reassoc) .Case("debug-location", MIToken::kw_debug_location) .Case("same_value", MIToken::kw_cfi_same_value) .Case("offset", MIToken::kw_cfi_offset) @@ -241,6 +232,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("dereferenceable", MIToken::kw_dereferenceable) .Case("invariant", MIToken::kw_invariant) .Case("align", MIToken::kw_align) + .Case("addrspace", MIToken::kw_addrspace) .Case("stack", MIToken::kw_stack) .Case("got", MIToken::kw_got) .Case("jump-table", MIToken::kw_jump_table) @@ -408,17 +400,38 @@ static bool isRegisterChar(char C) { return isIdentifierChar(C) && C != '.'; } -static Cursor maybeLexRegister(Cursor C, MIToken &Token) { - if (C.peek() != '%') +static Cursor lexNamedVirtualRegister(Cursor C, MIToken &Token) { + Cursor Range = C; + C.advance(); // Skip '%' + while (isRegisterChar(C.peek())) + C.advance(); + Token.reset(MIToken::NamedVirtualRegister, Range.upto(C)) + .setStringValue(Range.upto(C).drop_front(1)); // Drop the '%' + return C; +} + +static Cursor maybeLexRegister(Cursor C, MIToken &Token, + ErrorCallbackType ErrorCallback) { + if (C.peek() != '%' && C.peek() != '$') + return None; + + if (C.peek() == '%') { + if (isdigit(C.peek(1))) + return lexVirtualRegister(C, Token); + + if (isRegisterChar(C.peek(1))) + return lexNamedVirtualRegister(C, Token); + return None; - if (isdigit(C.peek(1))) - return lexVirtualRegister(C, Token); + } + + assert(C.peek() == '$'); auto Range = C; - C.advance(); // Skip '%' + C.advance(); // Skip '$' while (isRegisterChar(C.peek())) C.advance(); Token.reset(MIToken::NamedRegister, Range.upto(C)) - .setStringValue(Range.upto(C).drop_front(1)); // Drop the '%' + .setStringValue(Range.upto(C).drop_front(1)); // Drop the '$' return C; } @@ -441,7 +454,7 @@ static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token, static Cursor maybeLexExternalSymbol(Cursor C, MIToken &Token, ErrorCallbackType ErrorCallback) { - if (C.peek() != '$') + if (C.peek() != '&') return None; return lexName(C, Token, MIToken::ExternalSymbol, /*PrefixLength=*/1, ErrorCallback); @@ -620,8 +633,6 @@ StringRef llvm::lexMIToken(StringRef Source, MIToken &Token, return C.remaining(); } - if (Cursor R = maybeLexIntegerOrScalarType(C, Token)) - return R.remaining(); if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback)) return R.remaining(); if (Cursor R = maybeLexIdentifier(C, Token)) @@ -640,7 +651,7 @@ StringRef llvm::lexMIToken(StringRef Source, MIToken &Token, return R.remaining(); if (Cursor R = maybeLexIRValue(C, Token, ErrorCallback)) return R.remaining(); - if (Cursor R = maybeLexRegister(C, Token)) + if (Cursor R = maybeLexRegister(C, Token, ErrorCallback)) return R.remaining(); if (Cursor R = maybeLexGlobalValue(C, Token, ErrorCallback)) return R.remaining(); diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h index 0204d549d5d4..e21c71532f79 100644 --- a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h +++ b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h @@ -63,6 +63,14 @@ struct MIToken { kw_renamable, kw_tied_def, kw_frame_setup, + kw_frame_destroy, + kw_nnan, + kw_ninf, + kw_nsz, + kw_arcp, + kw_contract, + kw_afn, + kw_reassoc, kw_debug_location, kw_cfi_same_value, kw_cfi_offset, @@ -92,6 +100,7 @@ struct MIToken { kw_non_temporal, kw_invariant, kw_align, + kw_addrspace, kw_stack, kw_got, kw_jump_table, @@ -114,12 +123,10 @@ struct MIToken { // Identifier tokens Identifier, - IntegerType, NamedRegister, + NamedVirtualRegister, MachineBasicBlockLabel, MachineBasicBlock, - PointerType, - ScalarType, StackObject, FixedStackObject, NamedGlobalValue, @@ -168,7 +175,7 @@ public: bool isRegister() const { return Kind == NamedRegister || Kind == underscore || - Kind == VirtualRegister; + Kind == NamedVirtualRegister || Kind == VirtualRegister; } bool isRegisterFlag() const { diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 1a78ae3aad07..a61e7872f1ae 100644 --- a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -98,6 +98,18 @@ VRegInfo &PerFunctionMIParsingState::getVRegInfo(unsigned Num) { return *I.first->second; } +VRegInfo &PerFunctionMIParsingState::getVRegInfoNamed(StringRef RegName) { + assert(RegName != "" && "Expected named reg."); + + auto I = VRegInfosNamed.insert(std::make_pair(RegName.str(), nullptr)); + if (I.second) { + VRegInfo *Info = new (Allocator) VRegInfo; + Info->VReg = MF.getRegInfo().createIncompleteVirtualRegister(RegName); + I.first->second = Info; + } + return *I.first->second; +} + namespace { /// A wrapper struct around the 'MachineOperand' struct that includes a source @@ -182,6 +194,7 @@ public: bool parseNamedRegister(unsigned &Reg); bool parseVirtualRegister(VRegInfo *&Info); + bool parseNamedVirtualRegister(VRegInfo *&Info); bool parseRegister(unsigned &Reg, VRegInfo *&VRegInfo); bool parseRegisterFlag(unsigned &Flags); bool parseRegisterClassOrBank(VRegInfo &RegInfo); @@ -190,7 +203,7 @@ public: bool parseRegisterOperand(MachineOperand &Dest, Optional<unsigned> &TiedDefIdx, bool IsDef = false); bool parseImmediateOperand(MachineOperand &Dest); - bool parseIRConstant(StringRef::iterator Loc, StringRef Source, + bool parseIRConstant(StringRef::iterator Loc, StringRef StringValue, const Constant *&C); bool parseIRConstant(StringRef::iterator Loc, const Constant *&C); bool parseLowLevelType(StringRef::iterator Loc, LLT &Ty); @@ -209,7 +222,7 @@ public: bool parseJumpTableIndexOperand(MachineOperand &Dest); bool parseExternalSymbolOperand(MachineOperand &Dest); bool parseMDNode(MDNode *&Node); - bool parseDIExpression(MDNode *&Node); + bool parseDIExpression(MDNode *&Expr); bool parseMetadataOperand(MachineOperand &Dest); bool parseCFIOffset(int &Offset); bool parseCFIRegister(unsigned &Reg); @@ -228,6 +241,7 @@ public: Optional<unsigned> &TiedDefIdx); bool parseOffset(int64_t &Offset); bool parseAlignment(unsigned &Alignment); + bool parseAddrspace(unsigned &Addrspace); bool parseOperandsOffset(MachineOperand &Op); bool parseIRValue(const Value *&V); bool parseMemoryOperandFlag(MachineMemOperand::Flags &Flags); @@ -915,15 +929,43 @@ bool MIParser::verifyImplicitOperands(ArrayRef<ParsedMachineOperand> Operands, continue; return error(Operands.empty() ? Token.location() : Operands.back().End, Twine("missing implicit register operand '") + - printImplicitRegisterFlag(I) + " %" + + printImplicitRegisterFlag(I) + " $" + getRegisterName(TRI, I.getReg()) + "'"); } return false; } bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) { - if (Token.is(MIToken::kw_frame_setup)) { - Flags |= MachineInstr::FrameSetup; + // Allow frame and fast math flags for OPCODE + while (Token.is(MIToken::kw_frame_setup) || + Token.is(MIToken::kw_frame_destroy) || + Token.is(MIToken::kw_nnan) || + Token.is(MIToken::kw_ninf) || + Token.is(MIToken::kw_nsz) || + Token.is(MIToken::kw_arcp) || + Token.is(MIToken::kw_contract) || + Token.is(MIToken::kw_afn) || + Token.is(MIToken::kw_reassoc)) { + // Mine frame and fast math flags + if (Token.is(MIToken::kw_frame_setup)) + Flags |= MachineInstr::FrameSetup; + if (Token.is(MIToken::kw_frame_destroy)) + Flags |= MachineInstr::FrameDestroy; + if (Token.is(MIToken::kw_nnan)) + Flags |= MachineInstr::FmNoNans; + if (Token.is(MIToken::kw_ninf)) + Flags |= MachineInstr::FmNoInfs; + if (Token.is(MIToken::kw_nsz)) + Flags |= MachineInstr::FmNsz; + if (Token.is(MIToken::kw_arcp)) + Flags |= MachineInstr::FmArcp; + if (Token.is(MIToken::kw_contract)) + Flags |= MachineInstr::FmContract; + if (Token.is(MIToken::kw_afn)) + Flags |= MachineInstr::FmAfn; + if (Token.is(MIToken::kw_reassoc)) + Flags |= MachineInstr::FmReassoc; + lex(); } if (Token.isNot(MIToken::Identifier)) @@ -943,7 +985,18 @@ bool MIParser::parseNamedRegister(unsigned &Reg) { return false; } +bool MIParser::parseNamedVirtualRegister(VRegInfo *&Info) { + assert(Token.is(MIToken::NamedVirtualRegister) && "Expected NamedVReg token"); + StringRef Name = Token.stringValue(); + // TODO: Check that the VReg name is not the same as a physical register name. + // If it is, then print a warning (when warnings are implemented). + Info = &PFS.getVRegInfoNamed(Name); + return false; +} + bool MIParser::parseVirtualRegister(VRegInfo *&Info) { + if (Token.is(MIToken::NamedVirtualRegister)) + return parseNamedVirtualRegister(Info); assert(Token.is(MIToken::VirtualRegister) && "Needs VirtualRegister token"); unsigned ID; if (getUnsigned(ID)) @@ -959,6 +1012,7 @@ bool MIParser::parseRegister(unsigned &Reg, VRegInfo *&Info) { return false; case MIToken::NamedRegister: return parseNamedRegister(Reg); + case MIToken::NamedVirtualRegister: case MIToken::VirtualRegister: if (parseVirtualRegister(Info)) return true; @@ -1249,11 +1303,17 @@ bool MIParser::parseIRConstant(StringRef::iterator Loc, const Constant *&C) { } bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) { - if (Token.is(MIToken::ScalarType)) { + if (Token.range().front() == 's' || Token.range().front() == 'p') { + StringRef SizeStr = Token.range().drop_front(); + if (SizeStr.size() == 0 || !llvm::all_of(SizeStr, isdigit)) + return error("expected integers after 's'/'p' type character"); + } + + if (Token.range().front() == 's') { Ty = LLT::scalar(APSInt(Token.range().drop_front()).getZExtValue()); lex(); return false; - } else if (Token.is(MIToken::PointerType)) { + } else if (Token.range().front() == 'p') { const DataLayout &DL = MF.getDataLayout(); unsigned AS = APSInt(Token.range().drop_front()).getZExtValue(); Ty = LLT::pointer(AS, DL.getPointerSizeInBits(AS)); @@ -1264,38 +1324,60 @@ bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) { // Now we're looking for a vector. if (Token.isNot(MIToken::less)) return error(Loc, - "expected unsized, pN, sN or <N x sM> for GlobalISel type"); - + "expected sN, pA, <M x sN>, or <M x pA> for GlobalISel type"); lex(); if (Token.isNot(MIToken::IntegerLiteral)) - return error(Loc, "expected <N x sM> for vctor type"); + return error(Loc, "expected <M x sN> or <M x pA> for vector type"); uint64_t NumElements = Token.integerValue().getZExtValue(); lex(); if (Token.isNot(MIToken::Identifier) || Token.stringValue() != "x") - return error(Loc, "expected '<N x sM>' for vector type"); + return error(Loc, "expected <M x sN> or <M x pA> for vector type"); lex(); - if (Token.isNot(MIToken::ScalarType)) - return error(Loc, "expected '<N x sM>' for vector type"); - uint64_t ScalarSize = APSInt(Token.range().drop_front()).getZExtValue(); + if (Token.range().front() != 's' && Token.range().front() != 'p') + return error(Loc, "expected <M x sN> or <M x pA> for vector type"); + StringRef SizeStr = Token.range().drop_front(); + if (SizeStr.size() == 0 || !llvm::all_of(SizeStr, isdigit)) + return error("expected integers after 's'/'p' type character"); + + if (Token.range().front() == 's') + Ty = LLT::scalar(APSInt(Token.range().drop_front()).getZExtValue()); + else if (Token.range().front() == 'p') { + const DataLayout &DL = MF.getDataLayout(); + unsigned AS = APSInt(Token.range().drop_front()).getZExtValue(); + Ty = LLT::pointer(AS, DL.getPointerSizeInBits(AS)); + } else + return error(Loc, "expected <M x sN> or <M x pA> for vector type"); lex(); if (Token.isNot(MIToken::greater)) - return error(Loc, "expected '<N x sM>' for vector type"); + return error(Loc, "expected <M x sN> or <M x pA> for vector type"); lex(); - Ty = LLT::vector(NumElements, ScalarSize); + Ty = LLT::vector(NumElements, Ty); return false; } bool MIParser::parseTypedImmediateOperand(MachineOperand &Dest) { - assert(Token.is(MIToken::IntegerType)); + assert(Token.is(MIToken::Identifier)); + StringRef TypeStr = Token.range(); + if (TypeStr.front() != 'i' && TypeStr.front() != 's' && + TypeStr.front() != 'p') + return error( + "a typed immediate operand should start with one of 'i', 's', or 'p'"); + StringRef SizeStr = Token.range().drop_front(); + if (SizeStr.size() == 0 || !llvm::all_of(SizeStr, isdigit)) + return error("expected integers after 'i'/'s'/'p' type character"); + auto Loc = Token.location(); lex(); - if (Token.isNot(MIToken::IntegerLiteral)) - return error("expected an integer literal"); + if (Token.isNot(MIToken::IntegerLiteral)) { + if (Token.isNot(MIToken::Identifier) || + !(Token.range() == "true" || Token.range() == "false")) + return error("expected an integer literal"); + } const Constant *C = nullptr; if (parseIRConstant(Loc, C)) return true; @@ -1876,13 +1958,11 @@ bool MIParser::parseTargetIndexOperand(MachineOperand &Dest) { bool MIParser::parseCustomRegisterMaskOperand(MachineOperand &Dest) { assert(Token.stringValue() == "CustomRegMask" && "Expected a custom RegMask"); - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - assert(TRI && "Expected target register info"); lex(); if (expectAndConsume(MIToken::lparen)) return true; - uint32_t *Mask = MF.allocateRegisterMask(TRI->getNumRegs()); + uint32_t *Mask = MF.allocateRegMask(); while (true) { if (Token.isNot(MIToken::NamedRegister)) return error("expected a named register"); @@ -1905,9 +1985,7 @@ bool MIParser::parseCustomRegisterMaskOperand(MachineOperand &Dest) { bool MIParser::parseLiveoutRegisterMaskOperand(MachineOperand &Dest) { assert(Token.is(MIToken::kw_liveout)); - const auto *TRI = MF.getSubtarget().getRegisterInfo(); - assert(TRI && "Expected target register info"); - uint32_t *Mask = MF.allocateRegisterMask(TRI->getNumRegs()); + uint32_t *Mask = MF.allocateRegMask(); lex(); if (expectAndConsume(MIToken::lparen)) return true; @@ -1946,11 +2024,10 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest, case MIToken::underscore: case MIToken::NamedRegister: case MIToken::VirtualRegister: + case MIToken::NamedVirtualRegister: return parseRegisterOperand(Dest, TiedDefIdx); case MIToken::IntegerLiteral: return parseImmediateOperand(Dest); - case MIToken::IntegerType: - return parseTypedImmediateOperand(Dest); case MIToken::kw_half: case MIToken::kw_float: case MIToken::kw_double: @@ -2011,8 +2088,10 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest, Dest = MachineOperand::CreateRegMask(RegMask); lex(); break; - } else + } else if (Token.stringValue() == "CustomRegMask") { return parseCustomRegisterMaskOperand(Dest); + } else + return parseTypedImmediateOperand(Dest); default: // FIXME: Parse the MCSymbol machine operand. return error("expected a machine operand"); @@ -2091,6 +2170,17 @@ bool MIParser::parseAlignment(unsigned &Alignment) { return false; } +bool MIParser::parseAddrspace(unsigned &Addrspace) { + assert(Token.is(MIToken::kw_addrspace)); + lex(); + if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned()) + return error("expected an integer literal after 'addrspace'"); + if (getUnsigned(Addrspace)) + return true; + lex(); + return false; +} + bool MIParser::parseOperandsOffset(MachineOperand &Op) { int64_t Offset = 0; if (parseOffset(Offset)) @@ -2402,6 +2492,10 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { if (parseAlignment(BaseAlignment)) return true; break; + case MIToken::kw_addrspace: + if (parseAddrspace(Ptr.AddrSpace)) + return true; + break; case MIToken::md_tbaa: lex(); if (parseMDNode(AAInfo.TBAA)) diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h index 2307881068ef..b06ceb21b740 100644 --- a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h +++ b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h @@ -56,6 +56,7 @@ struct PerFunctionMIParsingState { DenseMap<unsigned, MachineBasicBlock *> MBBSlots; DenseMap<unsigned, VRegInfo*> VRegInfos; + StringMap<VRegInfo*> VRegInfosNamed; DenseMap<unsigned, int> FixedStackObjectSlots; DenseMap<unsigned, int> StackObjectSlots; DenseMap<unsigned, unsigned> ConstantPoolSlots; @@ -66,7 +67,8 @@ struct PerFunctionMIParsingState { const Name2RegClassMap &Names2RegClasses, const Name2RegBankMap &Names2RegBanks); - VRegInfo &getVRegInfo(unsigned VReg); + VRegInfo &getVRegInfo(unsigned Num); + VRegInfo &getVRegInfoNamed(StringRef RegName); }; /// Parse the machine basic block definitions, and skip the machine diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp index 7d8e62736a34..3d2db97acb48 100644 --- a/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -122,8 +122,9 @@ public: const yaml::StringValue &RegisterSource, bool IsRestored, int FrameIdx); + template <typename T> bool parseStackObjectsDebugInfo(PerFunctionMIParsingState &PFS, - const yaml::MachineStackObject &Object, + const T &Object, int FrameIdx); bool initializeConstantPool(PerFunctionMIParsingState &PFS, @@ -237,7 +238,7 @@ std::unique_ptr<Module> MIRParserImpl::parseIRModule() { dyn_cast_or_null<yaml::BlockScalarNode>(In.getCurrentNode())) { SMDiagnostic Error; M = parseAssembly(MemoryBufferRef(BSN->getValue(), Filename), Error, - Context, &IRSlots); + Context, &IRSlots, /*UpgradeDebugInfo=*/false); if (!M) { reportDiagnostic(diagFromBlockStringDiag(Error, BSN->getSourceRange())); return nullptr; @@ -362,6 +363,8 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF, MachineFunctionProperties::Property::RegBankSelected); if (YamlMF.Selected) MF.getProperties().set(MachineFunctionProperties::Property::Selected); + if (YamlMF.FailedISel) + MF.getProperties().set(MachineFunctionProperties::Property::FailedISel); PerFunctionMIParsingState PFS(MF, SM, IRSlots, Names2RegClasses, Names2RegBanks); @@ -417,6 +420,8 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF, computeFunctionProperties(MF); + MF.getSubtarget().mirFileLoaded(MF); + MF.verify(); return false; } @@ -508,13 +513,12 @@ bool MIRParserImpl::setupRegisterInfo(const PerFunctionMIParsingState &PFS, MachineRegisterInfo &MRI = MF.getRegInfo(); bool Error = false; // Create VRegs - for (auto P : PFS.VRegInfos) { - const VRegInfo &Info = *P.second; + auto populateVRegInfo = [&] (const VRegInfo &Info, Twine Name) { unsigned Reg = Info.VReg; switch (Info.Kind) { case VRegInfo::UNKNOWN: error(Twine("Cannot determine class/bank of virtual register ") + - Twine(P.first) + " in function '" + MF.getName() + "'"); + Name + " in function '" + MF.getName() + "'"); Error = true; break; case VRegInfo::NORMAL: @@ -528,6 +532,17 @@ bool MIRParserImpl::setupRegisterInfo(const PerFunctionMIParsingState &PFS, MRI.setRegBank(Reg, *Info.D.RegBank); break; } + }; + + for (auto I = PFS.VRegInfosNamed.begin(), E = PFS.VRegInfosNamed.end(); + I != E; I++) { + const VRegInfo &Info = *I->second; + populateVRegInfo(Info, Twine(I->first())); + } + + for (auto P : PFS.VRegInfos) { + const VRegInfo &Info = *P.second; + populateVRegInfo(Info, Twine(P.first)); } // Compute MachineRegisterInfo::UsedPhysRegMask @@ -568,6 +583,7 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS, MFI.setHasOpaqueSPAdjustment(YamlMFI.HasOpaqueSPAdjustment); MFI.setHasVAStart(YamlMFI.HasVAStart); MFI.setHasMustTailInVarArgFunc(YamlMFI.HasMustTailInVarArgFunc); + MFI.setLocalFrameSize(YamlMFI.LocalFrameSize); if (!YamlMFI.SavePoint.Value.empty()) { MachineBasicBlock *MBB = nullptr; if (parseMBBReference(PFS, MBB, YamlMFI.SavePoint)) @@ -601,6 +617,8 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS, if (parseCalleeSavedRegister(PFS, CSIInfo, Object.CalleeSavedRegister, Object.CalleeSavedRestored, ObjectIdx)) return true; + if (parseStackObjectsDebugInfo(PFS, Object, ObjectIdx)) + return true; } // Initialize the ordinary frame objects. @@ -685,11 +703,11 @@ static bool typecheckMDNode(T *&Result, MDNode *Node, return false; } +template <typename T> bool MIRParserImpl::parseStackObjectsDebugInfo(PerFunctionMIParsingState &PFS, - const yaml::MachineStackObject &Object, int FrameIdx) { + const T &Object, int FrameIdx) { // Debug information can only be attached to stack objects; Fixed stack // objects aren't supported. - assert(FrameIdx >= 0 && "Expected a stack object frame index"); MDNode *Var = nullptr, *Expr = nullptr, *Loc = nullptr; if (parseMDNode(PFS, Var, Object.DebugVar) || parseMDNode(PFS, Expr, Object.DebugExpr) || @@ -704,7 +722,7 @@ bool MIRParserImpl::parseStackObjectsDebugInfo(PerFunctionMIParsingState &PFS, typecheckMDNode(DIExpr, Expr, Object.DebugExpr, "DIExpression", *this) || typecheckMDNode(DILoc, Loc, Object.DebugLoc, "DILocation", *this)) return true; - PFS.MF.setVariableDbgInfo(DIVar, DIExpr, unsigned(FrameIdx), DILoc); + PFS.MF.setVariableDbgInfo(DIVar, DIExpr, FrameIdx, DILoc); return false; } diff --git a/contrib/llvm/lib/CodeGen/MIRPrinter.cpp b/contrib/llvm/lib/CodeGen/MIRPrinter.cpp index f91cca6e4e50..bf8cd1489ec5 100644 --- a/contrib/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/MIRPrinter.cpp @@ -19,7 +19,6 @@ #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" @@ -157,14 +156,10 @@ public: void print(const MachineBasicBlock &MBB); void print(const MachineInstr &MI); - void printIRValueReference(const Value &V); void printStackObjectReference(int FrameIndex); void print(const MachineInstr &MI, unsigned OpIdx, const TargetRegisterInfo *TRI, bool ShouldPrintRegisterTies, LLT TypeToPrint, bool PrintDef = true); - void print(const LLVMContext &Context, const TargetInstrInfo &TII, - const MachineMemOperand &Op); - void printSyncScope(const LLVMContext &Context, SyncScope::ID SSID); }; } // end namespace llvm @@ -207,6 +202,8 @@ void MIRPrinter::print(const MachineFunction &MF) { MachineFunctionProperties::Property::RegBankSelected); YamlMF.Selected = MF.getProperties().hasProperty( MachineFunctionProperties::Property::Selected); + YamlMF.FailedISel = MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedISel); convert(YamlMF, MF.getRegInfo(), MF.getSubtarget().getRegisterInfo()); ModuleSlotTracker MST(MF.getFunction().getParent()); @@ -259,6 +256,21 @@ static void printRegClassOrBank(unsigned Reg, yaml::StringValue &Dest, OS << printRegClassOrBank(Reg, RegInfo, TRI); } +template <typename T> +static void +printStackObjectDbgInfo(const MachineFunction::VariableDbgInfo &DebugVar, + T &Object, ModuleSlotTracker &MST) { + std::array<std::string *, 3> Outputs{{&Object.DebugVar.Value, + &Object.DebugExpr.Value, + &Object.DebugLoc.Value}}; + std::array<const Metadata *, 3> Metas{{DebugVar.Var, + DebugVar.Expr, + DebugVar.Loc}}; + for (unsigned i = 0; i < 3; ++i) { + raw_string_ostream StrOS(*Outputs[i]); + Metas[i]->printAsOperand(StrOS, MST); + } +} void MIRPrinter::convert(yaml::MachineFunction &MF, const MachineRegisterInfo &RegInfo, @@ -270,6 +282,8 @@ void MIRPrinter::convert(yaml::MachineFunction &MF, unsigned Reg = TargetRegisterInfo::index2VirtReg(I); yaml::VirtualRegisterDefinition VReg; VReg.ID = I; + if (RegInfo.getVRegName(Reg) != "") + continue; ::printRegClassOrBank(Reg, VReg.Class, RegInfo, TRI); unsigned PreferredReg = RegInfo.getSimpleHint(Reg); if (PreferredReg) @@ -316,6 +330,7 @@ void MIRPrinter::convert(ModuleSlotTracker &MST, YamlMFI.HasOpaqueSPAdjustment = MFI.hasOpaqueSPAdjustment(); YamlMFI.HasVAStart = MFI.hasVAStart(); YamlMFI.HasMustTailInVarArgFunc = MFI.hasMustTailInVarArgFunc(); + YamlMFI.LocalFrameSize = MFI.getLocalFrameSize(); if (MFI.getSavePoint()) { raw_string_ostream StrOS(YamlMFI.SavePoint.Value); StrOS << printMBBReference(*MFI.getSavePoint()); @@ -421,19 +436,12 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF, assert(StackObjectInfo != StackObjectOperandMapping.end() && "Invalid stack object index"); const FrameIndexOperand &StackObject = StackObjectInfo->second; - assert(!StackObject.IsFixed && "Expected a non-fixed stack object"); - auto &Object = YMF.StackObjects[StackObject.ID]; - { - raw_string_ostream StrOS(Object.DebugVar.Value); - DebugVar.Var->printAsOperand(StrOS, MST); - } - { - raw_string_ostream StrOS(Object.DebugExpr.Value); - DebugVar.Expr->printAsOperand(StrOS, MST); - } - { - raw_string_ostream StrOS(Object.DebugLoc.Value); - DebugVar.Loc->printAsOperand(StrOS, MST); + if (StackObject.IsFixed) { + auto &Object = YMF.FixedStackObjects[StackObject.ID]; + printStackObjectDbgInfo(DebugVar, Object, MST); + } else { + auto &Object = YMF.StackObjects[StackObject.ID]; + printStackObjectDbgInfo(DebugVar, Object, MST); } } } @@ -670,6 +678,23 @@ void MIPrinter::print(const MachineInstr &MI) { OS << " = "; if (MI.getFlag(MachineInstr::FrameSetup)) OS << "frame-setup "; + if (MI.getFlag(MachineInstr::FrameDestroy)) + OS << "frame-destroy "; + if (MI.getFlag(MachineInstr::FmNoNans)) + OS << "nnan "; + if (MI.getFlag(MachineInstr::FmNoInfs)) + OS << "ninf "; + if (MI.getFlag(MachineInstr::FmNsz)) + OS << "nsz "; + if (MI.getFlag(MachineInstr::FmArcp)) + OS << "arcp "; + if (MI.getFlag(MachineInstr::FmContract)) + OS << "contract "; + if (MI.getFlag(MachineInstr::FmAfn)) + OS << "afn "; + if (MI.getFlag(MachineInstr::FmReassoc)) + OS << "reassoc "; + OS << TII->getName(MI.getOpcode()); if (I < E) OS << ' '; @@ -683,46 +708,27 @@ void MIPrinter::print(const MachineInstr &MI) { NeedComma = true; } - if (MI.getDebugLoc()) { + if (const DebugLoc &DL = MI.getDebugLoc()) { if (NeedComma) OS << ','; OS << " debug-location "; - MI.getDebugLoc()->printAsOperand(OS, MST); + DL->printAsOperand(OS, MST); } if (!MI.memoperands_empty()) { OS << " :: "; const LLVMContext &Context = MF->getFunction().getContext(); + const MachineFrameInfo &MFI = MF->getFrameInfo(); bool NeedComma = false; for (const auto *Op : MI.memoperands()) { if (NeedComma) OS << ", "; - print(Context, *TII, *Op); + Op->print(OS, MST, SSNs, Context, &MFI, TII); NeedComma = true; } } } -void MIPrinter::printIRValueReference(const Value &V) { - if (isa<GlobalValue>(V)) { - V.printAsOperand(OS, /*PrintType=*/false, MST); - return; - } - if (isa<Constant>(V)) { - // Machine memory operands can load/store to/from constant value pointers. - OS << '`'; - V.printAsOperand(OS, /*PrintType=*/true, MST); - OS << '`'; - return; - } - OS << "%ir."; - if (V.hasName()) { - printLLVMNameWithoutPrefix(OS, V.getName()); - return; - } - MachineOperand::printIRSlotNumber(OS, MST.getLocalSlot(&V)); -} - void MIPrinter::printStackObjectReference(int FrameIndex) { auto ObjectInfo = StackObjectOperandMapping.find(FrameIndex); assert(ObjectInfo != StackObjectOperandMapping.end() && @@ -741,7 +747,7 @@ void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx, case MachineOperand::MO_Immediate: if (MI.isOperandSubregIdx(OpIdx)) { MachineOperand::printTargetFlags(OS, Op); - MachineOperand::printSubregIdx(OS, Op.getImm(), TRI); + MachineOperand::printSubRegIdx(OS, Op.getImm(), TRI); break; } LLVM_FALLTHROUGH; @@ -765,8 +771,8 @@ void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx, if (ShouldPrintRegisterTies && Op.isReg() && Op.isTied() && !Op.isDef()) TiedOperandIdx = Op.getParent()->findTiedOperandIdx(OpIdx); const TargetIntrinsicInfo *TII = MI.getMF()->getTarget().getIntrinsicInfo(); - Op.print(OS, MST, TypeToPrint, PrintDef, ShouldPrintRegisterTies, - TiedOperandIdx, TRI, TII); + Op.print(OS, MST, TypeToPrint, PrintDef, /*IsStandalone=*/false, + ShouldPrintRegisterTies, TiedOperandIdx, TRI, TII); break; } case MachineOperand::MO_FrameIndex: @@ -783,132 +789,6 @@ void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx, } } -static const char *getTargetMMOFlagName(const TargetInstrInfo &TII, - unsigned TMMOFlag) { - auto Flags = TII.getSerializableMachineMemOperandTargetFlags(); - for (const auto &I : Flags) { - if (I.first == TMMOFlag) { - return I.second; - } - } - return nullptr; -} - -void MIPrinter::print(const LLVMContext &Context, const TargetInstrInfo &TII, - const MachineMemOperand &Op) { - OS << '('; - if (Op.isVolatile()) - OS << "volatile "; - if (Op.isNonTemporal()) - OS << "non-temporal "; - if (Op.isDereferenceable()) - OS << "dereferenceable "; - if (Op.isInvariant()) - OS << "invariant "; - if (Op.getFlags() & MachineMemOperand::MOTargetFlag1) - OS << '"' << getTargetMMOFlagName(TII, MachineMemOperand::MOTargetFlag1) - << "\" "; - if (Op.getFlags() & MachineMemOperand::MOTargetFlag2) - OS << '"' << getTargetMMOFlagName(TII, MachineMemOperand::MOTargetFlag2) - << "\" "; - if (Op.getFlags() & MachineMemOperand::MOTargetFlag3) - OS << '"' << getTargetMMOFlagName(TII, MachineMemOperand::MOTargetFlag3) - << "\" "; - - assert((Op.isLoad() || Op.isStore()) && "machine memory operand must be a load or store (or both)"); - if (Op.isLoad()) - OS << "load "; - if (Op.isStore()) - OS << "store "; - - printSyncScope(Context, Op.getSyncScopeID()); - - if (Op.getOrdering() != AtomicOrdering::NotAtomic) - OS << toIRString(Op.getOrdering()) << ' '; - if (Op.getFailureOrdering() != AtomicOrdering::NotAtomic) - OS << toIRString(Op.getFailureOrdering()) << ' '; - - OS << Op.getSize(); - if (const Value *Val = Op.getValue()) { - OS << ((Op.isLoad() && Op.isStore()) ? " on " - : Op.isLoad() ? " from " : " into "); - printIRValueReference(*Val); - } else if (const PseudoSourceValue *PVal = Op.getPseudoValue()) { - OS << ((Op.isLoad() && Op.isStore()) ? " on " - : Op.isLoad() ? " from " : " into "); - assert(PVal && "Expected a pseudo source value"); - switch (PVal->kind()) { - case PseudoSourceValue::Stack: - OS << "stack"; - break; - case PseudoSourceValue::GOT: - OS << "got"; - break; - case PseudoSourceValue::JumpTable: - OS << "jump-table"; - break; - case PseudoSourceValue::ConstantPool: - OS << "constant-pool"; - break; - case PseudoSourceValue::FixedStack: - printStackObjectReference( - cast<FixedStackPseudoSourceValue>(PVal)->getFrameIndex()); - break; - case PseudoSourceValue::GlobalValueCallEntry: - OS << "call-entry "; - cast<GlobalValuePseudoSourceValue>(PVal)->getValue()->printAsOperand( - OS, /*PrintType=*/false, MST); - break; - case PseudoSourceValue::ExternalSymbolCallEntry: - OS << "call-entry $"; - printLLVMNameWithoutPrefix( - OS, cast<ExternalSymbolPseudoSourceValue>(PVal)->getSymbol()); - break; - case PseudoSourceValue::TargetCustom: - llvm_unreachable("TargetCustom pseudo source values are not supported"); - break; - } - } - MachineOperand::printOperandOffset(OS, Op.getOffset()); - if (Op.getBaseAlignment() != Op.getSize()) - OS << ", align " << Op.getBaseAlignment(); - auto AAInfo = Op.getAAInfo(); - if (AAInfo.TBAA) { - OS << ", !tbaa "; - AAInfo.TBAA->printAsOperand(OS, MST); - } - if (AAInfo.Scope) { - OS << ", !alias.scope "; - AAInfo.Scope->printAsOperand(OS, MST); - } - if (AAInfo.NoAlias) { - OS << ", !noalias "; - AAInfo.NoAlias->printAsOperand(OS, MST); - } - if (Op.getRanges()) { - OS << ", !range "; - Op.getRanges()->printAsOperand(OS, MST); - } - OS << ')'; -} - -void MIPrinter::printSyncScope(const LLVMContext &Context, SyncScope::ID SSID) { - switch (SSID) { - case SyncScope::System: { - break; - } - default: { - if (SSNs.empty()) - Context.getSyncScopeNames(SSNs); - - OS << "syncscope(\""; - PrintEscapedString(SSNs[SSID], OS); - OS << "\") "; - break; - } - } -} - void llvm::printMIR(raw_ostream &OS, const Module &M) { yaml::Output Out(OS); Out << const_cast<Module &>(M); diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp index cd67449e3acf..38e8369dc739 100644 --- a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -24,6 +24,7 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfoMetadata.h" @@ -173,7 +174,7 @@ MachineBasicBlock::SkipPHIsLabelsAndDebug(MachineBasicBlock::iterator I) { const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo(); iterator E = end(); - while (I != E && (I->isPHI() || I->isPosition() || I->isDebugValue() || + while (I != E && (I->isPHI() || I->isPosition() || I->isDebugInstr() || TII->isBasicBlockPrologue(*I))) ++I; // FIXME: This needs to change if we wish to bundle labels / dbg_values @@ -186,7 +187,7 @@ MachineBasicBlock::SkipPHIsLabelsAndDebug(MachineBasicBlock::iterator I) { MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() { iterator B = begin(), E = end(), I = E; - while (I != B && ((--I)->isTerminator() || I->isDebugValue())) + while (I != B && ((--I)->isTerminator() || I->isDebugInstr())) ; /*noop */ while (I != E && !I->isTerminator()) ++I; @@ -195,7 +196,7 @@ MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() { MachineBasicBlock::instr_iterator MachineBasicBlock::getFirstInstrTerminator() { instr_iterator B = instr_begin(), E = instr_end(), I = E; - while (I != B && ((--I)->isTerminator() || I->isDebugValue())) + while (I != B && ((--I)->isTerminator() || I->isDebugInstr())) ; /*noop */ while (I != E && !I->isTerminator()) ++I; @@ -213,7 +214,7 @@ MachineBasicBlock::iterator MachineBasicBlock::getLastNonDebugInstr() { while (I != B) { --I; // Return instruction that starts a bundle. - if (I->isDebugValue() || I->isInsideBundle()) + if (I->isDebugInstr() || I->isInsideBundle()) continue; return I; } @@ -259,8 +260,8 @@ std::string MachineBasicBlock::getFullName() const { return Name; } -void MachineBasicBlock::print(raw_ostream &OS, const SlotIndexes *Indexes) - const { +void MachineBasicBlock::print(raw_ostream &OS, const SlotIndexes *Indexes, + bool IsStandalone) const { const MachineFunction *MF = getParent(); if (!MF) { OS << "Can't print out MachineBasicBlock because parent MachineFunction" @@ -270,11 +271,13 @@ void MachineBasicBlock::print(raw_ostream &OS, const SlotIndexes *Indexes) const Function &F = MF->getFunction(); const Module *M = F.getParent(); ModuleSlotTracker MST(M); - print(OS, MST, Indexes); + MST.incorporateFunction(F); + print(OS, MST, Indexes, IsStandalone); } void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, - const SlotIndexes *Indexes) const { + const SlotIndexes *Indexes, + bool IsStandalone) const { const MachineFunction *MF = getParent(); if (!MF) { OS << "Can't print out MachineBasicBlock because parent MachineFunction" @@ -285,70 +288,143 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, if (Indexes) OS << Indexes->getMBBStartIdx(this) << '\t'; - OS << printMBBReference(*this) << ": "; - - const char *Comma = ""; - if (const BasicBlock *LBB = getBasicBlock()) { - OS << Comma << "derived from LLVM BB "; - LBB->printAsOperand(OS, /*PrintType=*/false, MST); - Comma = ", "; + OS << "bb." << getNumber(); + bool HasAttributes = false; + if (const auto *BB = getBasicBlock()) { + if (BB->hasName()) { + OS << "." << BB->getName(); + } else { + HasAttributes = true; + OS << " ("; + int Slot = MST.getLocalSlot(BB); + if (Slot == -1) + OS << "<ir-block badref>"; + else + OS << (Twine("%ir-block.") + Twine(Slot)).str(); + } } - if (isEHPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; } - if (hasAddressTaken()) { OS << Comma << "ADDRESS TAKEN"; Comma = ", "; } - if (Alignment) - OS << Comma << "Align " << Alignment << " (" << (1u << Alignment) - << " bytes)"; - OS << '\n'; + if (hasAddressTaken()) { + OS << (HasAttributes ? ", " : " ("); + OS << "address-taken"; + HasAttributes = true; + } + if (isEHPad()) { + OS << (HasAttributes ? ", " : " ("); + OS << "landing-pad"; + HasAttributes = true; + } + if (getAlignment()) { + OS << (HasAttributes ? ", " : " ("); + OS << "align " << getAlignment(); + HasAttributes = true; + } + if (HasAttributes) + OS << ")"; + OS << ":\n"; const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); - if (!livein_empty()) { + const MachineRegisterInfo &MRI = MF->getRegInfo(); + const TargetInstrInfo &TII = *getParent()->getSubtarget().getInstrInfo(); + bool HasLineAttributes = false; + + // Print the preds of this block according to the CFG. + if (!pred_empty() && IsStandalone) { if (Indexes) OS << '\t'; - OS << " Live Ins:"; - for (const auto &LI : LiveIns) { - OS << ' ' << printReg(LI.PhysReg, TRI); - if (!LI.LaneMask.all()) - OS << ':' << PrintLaneMask(LI.LaneMask); + // Don't indent(2), align with previous line attributes. + OS << "; predecessors: "; + for (auto I = pred_begin(), E = pred_end(); I != E; ++I) { + if (I != pred_begin()) + OS << ", "; + OS << printMBBReference(**I); } OS << '\n'; + HasLineAttributes = true; } - // Print the preds of this block according to the CFG. - if (!pred_empty()) { + + if (!succ_empty()) { if (Indexes) OS << '\t'; - OS << " Predecessors according to CFG:"; - for (const_pred_iterator PI = pred_begin(), E = pred_end(); PI != E; ++PI) - OS << " " << printMBBReference(*(*PI)); + // Print the successors + OS.indent(2) << "successors: "; + for (auto I = succ_begin(), E = succ_end(); I != E; ++I) { + if (I != succ_begin()) + OS << ", "; + OS << printMBBReference(**I); + if (!Probs.empty()) + OS << '(' + << format("0x%08" PRIx32, getSuccProbability(I).getNumerator()) + << ')'; + } + if (!Probs.empty() && IsStandalone) { + // Print human readable probabilities as comments. + OS << "; "; + for (auto I = succ_begin(), E = succ_end(); I != E; ++I) { + const BranchProbability &BP = *getProbabilityIterator(I); + if (I != succ_begin()) + OS << ", "; + OS << printMBBReference(**I) << '(' + << format("%.2f%%", + rint(((double)BP.getNumerator() / BP.getDenominator()) * + 100.0 * 100.0) / + 100.0) + << ')'; + } + } + OS << '\n'; + HasLineAttributes = true; } - for (auto &I : instrs()) { + if (!livein_empty() && MRI.tracksLiveness()) { + if (Indexes) OS << '\t'; + OS.indent(2) << "liveins: "; + + bool First = true; + for (const auto &LI : liveins()) { + if (!First) + OS << ", "; + First = false; + OS << printReg(LI.PhysReg, TRI); + if (!LI.LaneMask.all()) + OS << ":0x" << PrintLaneMask(LI.LaneMask); + } + HasLineAttributes = true; + } + + if (HasLineAttributes) + OS << '\n'; + + bool IsInBundle = false; + for (const MachineInstr &MI : instrs()) { if (Indexes) { - if (Indexes->hasIndex(I)) - OS << Indexes->getInstructionIndex(I); + if (Indexes->hasIndex(MI)) + OS << Indexes->getInstructionIndex(MI); OS << '\t'; } - OS << '\t'; - if (I.isInsideBundle()) - OS << " * "; - I.print(OS, MST); - } - // Print the successors of this block according to the CFG. - if (!succ_empty()) { - if (Indexes) OS << '\t'; - OS << " Successors according to CFG:"; - for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI) { - OS << " " << printMBBReference(*(*SI)); - if (!Probs.empty()) - OS << '(' << *getProbabilityIterator(SI) << ')'; + if (IsInBundle && !MI.isInsideBundle()) { + OS.indent(2) << "}\n"; + IsInBundle = false; + } + + OS.indent(IsInBundle ? 4 : 2); + MI.print(OS, MST, IsStandalone, /*SkipOpers=*/false, /*SkipDebugLoc=*/false, + /*AddNewLine=*/false, &TII); + + if (!IsInBundle && MI.getFlag(MachineInstr::BundledSucc)) { + OS << " {"; + IsInBundle = true; } OS << '\n'; } - if (IrrLoopHeaderWeight) { + + if (IsInBundle) + OS.indent(2) << "}\n"; + + if (IrrLoopHeaderWeight && IsStandalone) { if (Indexes) OS << '\t'; - OS << " Irreducible loop header weight: " - << IrrLoopHeaderWeight.getValue(); - OS << '\n'; + OS.indent(2) << "; Irreducible loop header weight: " + << IrrLoopHeaderWeight.getValue() << '\n'; } } @@ -382,10 +458,10 @@ bool MachineBasicBlock::isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) const { } void MachineBasicBlock::sortUniqueLiveIns() { - std::sort(LiveIns.begin(), LiveIns.end(), - [](const RegisterMaskPair &LI0, const RegisterMaskPair &LI1) { - return LI0.PhysReg < LI1.PhysReg; - }); + llvm::sort(LiveIns.begin(), LiveIns.end(), + [](const RegisterMaskPair &LI0, const RegisterMaskPair &LI1) { + return LI0.PhysReg < LI1.PhysReg; + }); // Liveins are sorted by physreg now we can merge their lanemasks. LiveInVector::const_iterator I = LiveIns.begin(); LiveInVector::const_iterator J; @@ -583,6 +659,25 @@ void MachineBasicBlock::addSuccessorWithoutProb(MachineBasicBlock *Succ) { Succ->addPredecessor(this); } +void MachineBasicBlock::splitSuccessor(MachineBasicBlock *Old, + MachineBasicBlock *New, + bool NormalizeSuccProbs) { + succ_iterator OldI = llvm::find(successors(), Old); + assert(OldI != succ_end() && "Old is not a successor of this block!"); + assert(llvm::find(successors(), New) == succ_end() && + "New is already a successor of this block!"); + + // Add a new successor with equal probability as the original one. Note + // that we directly copy the probability using the iterator rather than + // getting a potentially synthetic probability computed when unknown. This + // preserves the probabilities as-is and then we can renormalize them and + // query them effectively afterward. + addSuccessor(New, Probs.empty() ? BranchProbability::getUnknown() + : *getProbabilityIterator(OldI)); + if (NormalizeSuccProbs) + normalizeSuccProbs(); +} + void MachineBasicBlock::removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs) { succ_iterator I = find(Successors, Succ); @@ -779,9 +874,9 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock(); MF->insert(std::next(MachineFunction::iterator(this)), NMBB); - DEBUG(dbgs() << "Splitting critical edge: " << printMBBReference(*this) - << " -- " << printMBBReference(*NMBB) << " -- " - << printMBBReference(*Succ) << '\n'); + LLVM_DEBUG(dbgs() << "Splitting critical edge: " << printMBBReference(*this) + << " -- " << printMBBReference(*NMBB) << " -- " + << printMBBReference(*Succ) << '\n'); LiveIntervals *LIS = P.getAnalysisIfAvailable<LiveIntervals>(); SlotIndexes *Indexes = P.getAnalysisIfAvailable<SlotIndexes>(); @@ -810,7 +905,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, if (TargetRegisterInfo::isPhysicalRegister(Reg) || LV->getVarInfo(Reg).removeKill(*MI)) { KilledRegs.push_back(Reg); - DEBUG(dbgs() << "Removing terminator kill: " << *MI); + LLVM_DEBUG(dbgs() << "Removing terminator kill: " << *MI); OI->setIsKill(false); } } @@ -901,7 +996,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, continue; if (TargetRegisterInfo::isVirtualRegister(Reg)) LV->getVarInfo(Reg).Kills.push_back(&*I); - DEBUG(dbgs() << "Restored terminator kill: " << *I); + LLVM_DEBUG(dbgs() << "Restored terminator kill: " << *I); break; } } @@ -1034,8 +1129,8 @@ bool MachineBasicBlock::canSplitCriticalEdge( // case that we can't handle. Since this never happens in properly optimized // code, just skip those edges. if (TBB && TBB == FBB) { - DEBUG(dbgs() << "Won't split critical edge after degenerate " - << printMBBReference(*this) << '\n'); + LLVM_DEBUG(dbgs() << "Won't split critical edge after degenerate " + << printMBBReference(*this) << '\n'); return false; } return true; @@ -1189,6 +1284,16 @@ MachineBasicBlock::findDebugLoc(instr_iterator MBBI) { return {}; } +/// Find the previous valid DebugLoc preceding MBBI, skipping and DBG_VALUE +/// instructions. Return UnknownLoc if there is none. +DebugLoc MachineBasicBlock::findPrevDebugLoc(instr_iterator MBBI) { + if (MBBI == instr_begin()) return {}; + // Skip debug declarations, we don't want a DebugLoc from them. + MBBI = skipDebugInstructionsBackward(std::prev(MBBI), instr_begin()); + if (!MBBI->isDebugInstr()) return MBBI->getDebugLoc(); + return {}; +} + /// Find and return the merged DebugLoc of the branch instructions of the block. /// Return UnknownLoc if there is none. DebugLoc diff --git a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp index 167135b56ec0..21350df624e7 100644 --- a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -198,10 +198,10 @@ namespace { class BlockChain; -/// \brief Type for our function-wide basic block -> block chain mapping. +/// Type for our function-wide basic block -> block chain mapping. using BlockToChainMapType = DenseMap<const MachineBasicBlock *, BlockChain *>; -/// \brief A chain of blocks which will be laid out contiguously. +/// A chain of blocks which will be laid out contiguously. /// /// This is the datastructure representing a chain of consecutive blocks that /// are profitable to layout together in order to maximize fallthrough @@ -213,13 +213,13 @@ using BlockToChainMapType = DenseMap<const MachineBasicBlock *, BlockChain *>; /// them. They participate in a block-to-chain mapping, which is updated /// automatically as chains are merged together. class BlockChain { - /// \brief The sequence of blocks belonging to this chain. + /// The sequence of blocks belonging to this chain. /// /// This is the sequence of blocks for a particular chain. These will be laid /// out in-order within the function. SmallVector<MachineBasicBlock *, 4> Blocks; - /// \brief A handle to the function-wide basic block to block chain mapping. + /// A handle to the function-wide basic block to block chain mapping. /// /// This is retained in each block chain to simplify the computation of child /// block chains for SCC-formation and iteration. We store the edges to child @@ -228,7 +228,7 @@ class BlockChain { BlockToChainMapType &BlockToChain; public: - /// \brief Construct a new BlockChain. + /// Construct a new BlockChain. /// /// This builds a new block chain representing a single basic block in the /// function. It also registers itself as the chain that block participates @@ -239,15 +239,15 @@ public: BlockToChain[BB] = this; } - /// \brief Iterator over blocks within the chain. + /// Iterator over blocks within the chain. using iterator = SmallVectorImpl<MachineBasicBlock *>::iterator; using const_iterator = SmallVectorImpl<MachineBasicBlock *>::const_iterator; - /// \brief Beginning of blocks within the chain. + /// Beginning of blocks within the chain. iterator begin() { return Blocks.begin(); } const_iterator begin() const { return Blocks.begin(); } - /// \brief End of blocks within the chain. + /// End of blocks within the chain. iterator end() { return Blocks.end(); } const_iterator end() const { return Blocks.end(); } @@ -261,7 +261,7 @@ public: return false; } - /// \brief Merge a block chain into this one. + /// Merge a block chain into this one. /// /// This routine merges a block chain into this one. It takes care of forming /// a contiguous sequence of basic blocks, updating the edge list, and @@ -293,14 +293,14 @@ public: } #ifndef NDEBUG - /// \brief Dump the blocks in this chain. + /// Dump the blocks in this chain. LLVM_DUMP_METHOD void dump() { for (MachineBasicBlock *MBB : *this) MBB->dump(); } #endif // NDEBUG - /// \brief Count of predecessors of any block within the chain which have not + /// Count of predecessors of any block within the chain which have not /// yet been scheduled. In general, we will delay scheduling this chain /// until those predecessors are scheduled (or we find a sufficiently good /// reason to override this heuristic.) Note that when forming loop chains, @@ -313,7 +313,7 @@ public: }; class MachineBlockPlacement : public MachineFunctionPass { - /// \brief A type for a block filter set. + /// A type for a block filter set. using BlockFilterSet = SmallSetVector<const MachineBasicBlock *, 16>; /// Pair struct containing basic block and taildup profitiability @@ -329,47 +329,47 @@ class MachineBlockPlacement : public MachineFunctionPass { MachineBasicBlock *Dest; }; - /// \brief work lists of blocks that are ready to be laid out + /// work lists of blocks that are ready to be laid out SmallVector<MachineBasicBlock *, 16> BlockWorkList; SmallVector<MachineBasicBlock *, 16> EHPadWorkList; /// Edges that have already been computed as optimal. DenseMap<const MachineBasicBlock *, BlockAndTailDupResult> ComputedEdges; - /// \brief Machine Function + /// Machine Function MachineFunction *F; - /// \brief A handle to the branch probability pass. + /// A handle to the branch probability pass. const MachineBranchProbabilityInfo *MBPI; - /// \brief A handle to the function-wide block frequency pass. + /// A handle to the function-wide block frequency pass. std::unique_ptr<BranchFolder::MBFIWrapper> MBFI; - /// \brief A handle to the loop info. + /// A handle to the loop info. MachineLoopInfo *MLI; - /// \brief Preferred loop exit. + /// Preferred loop exit. /// Member variable for convenience. It may be removed by duplication deep /// in the call stack. MachineBasicBlock *PreferredLoopExit; - /// \brief A handle to the target's instruction info. + /// A handle to the target's instruction info. const TargetInstrInfo *TII; - /// \brief A handle to the target's lowering info. + /// A handle to the target's lowering info. const TargetLoweringBase *TLI; - /// \brief A handle to the post dominator tree. + /// A handle to the post dominator tree. MachinePostDominatorTree *MPDT; - /// \brief Duplicator used to duplicate tails during placement. + /// Duplicator used to duplicate tails during placement. /// /// Placement decisions can open up new tail duplication opportunities, but /// since tail duplication affects placement decisions of later blocks, it /// must be done inline. TailDuplicator TailDup; - /// \brief Allocator and owner of BlockChain structures. + /// Allocator and owner of BlockChain structures. /// /// We build BlockChains lazily while processing the loop structure of /// a function. To reduce malloc traffic, we allocate them using this @@ -378,7 +378,7 @@ class MachineBlockPlacement : public MachineFunctionPass { /// the chains. SpecificBumpPtrAllocator<BlockChain> ChainAllocator; - /// \brief Function wide BasicBlock to BlockChain mapping. + /// Function wide BasicBlock to BlockChain mapping. /// /// This mapping allows efficiently moving from any given basic block to the /// BlockChain it participates in, if any. We use it to, among other things, @@ -425,7 +425,7 @@ class MachineBlockPlacement : public MachineFunctionPass { MachineBasicBlock *BB, MachineBasicBlock *LPred, BlockChain &Chain, BlockFilterSet *BlockFilter, MachineFunction::iterator &PrevUnplacedBlockIt, - bool &DuplicatedToPred); + bool &DuplicatedToLPred); bool hasBetterLayoutPredecessor( const MachineBasicBlock *BB, const MachineBasicBlock *Succ, const BlockChain &SuccChain, BranchProbability SuccProb, @@ -441,7 +441,7 @@ class MachineBlockPlacement : public MachineFunctionPass { MachineFunction::iterator &PrevUnplacedBlockIt, const BlockFilterSet *BlockFilter); - /// \brief Add a basic block to the work list if it is appropriate. + /// Add a basic block to the work list if it is appropriate. /// /// If the optional parameter BlockFilter is provided, only MBB /// present in the set will be added to the worklist. If nullptr @@ -474,7 +474,7 @@ class MachineBlockPlacement : public MachineFunctionPass { /// fallthroughs. bool isProfitableToTailDup( const MachineBasicBlock *BB, const MachineBasicBlock *Succ, - BranchProbability AdjustedSumProb, + BranchProbability QProb, const BlockChain &Chain, const BlockFilterSet *BlockFilter); /// Check for a trellis layout. @@ -545,7 +545,7 @@ INITIALIZE_PASS_END(MachineBlockPlacement, DEBUG_TYPE, "Branch Probability Basic Block Placement", false, false) #ifndef NDEBUG -/// \brief Helper to print the name of a MBB. +/// Helper to print the name of a MBB. /// /// Only used by debug logging. static std::string getBlockName(const MachineBasicBlock *BB) { @@ -558,7 +558,7 @@ static std::string getBlockName(const MachineBasicBlock *BB) { } #endif -/// \brief Mark a chain's successors as having one fewer preds. +/// Mark a chain's successors as having one fewer preds. /// /// When a chain is being merged into the "placed" chain, this routine will /// quickly walk the successors of each block in the chain and mark them as @@ -574,7 +574,7 @@ void MachineBlockPlacement::markChainSuccessors( } } -/// \brief Mark a single block's successors as having one fewer preds. +/// Mark a single block's successors as having one fewer preds. /// /// Under normal circumstances, this is only called by markChainSuccessors, /// but if a block that was to be placed is completely tail-duplicated away, @@ -643,7 +643,8 @@ BranchProbability MachineBlockPlacement::collectViableSuccessors( if (SuccChain == &Chain) { SkipSucc = true; } else if (Succ != *SuccChain->begin()) { - DEBUG(dbgs() << " " << getBlockName(Succ) << " -> Mid chain!\n"); + LLVM_DEBUG(dbgs() << " " << getBlockName(Succ) + << " -> Mid chain!\n"); continue; } } @@ -1010,7 +1011,7 @@ MachineBlockPlacement::getBestTrellisSuccessor( // If we have a trellis, and BB doesn't have the best fallthrough edges, // we shouldn't choose any successor. We've already looked and there's a // better fallthrough edge for all the successors. - DEBUG(dbgs() << "Trellis, but not one of the chosen edges.\n"); + LLVM_DEBUG(dbgs() << "Trellis, but not one of the chosen edges.\n"); return Result; } @@ -1027,10 +1028,11 @@ MachineBlockPlacement::getBestTrellisSuccessor( canTailDuplicateUnplacedPreds(BB, Succ2, Chain, BlockFilter) && isProfitableToTailDup(BB, Succ2, MBPI->getEdgeProbability(BB, Succ1), Chain, BlockFilter)) { - DEBUG(BranchProbability Succ2Prob = getAdjustedProbability( - MBPI->getEdgeProbability(BB, Succ2), AdjustedSumProb); - dbgs() << " Selected: " << getBlockName(Succ2) - << ", probability: " << Succ2Prob << " (Tail Duplicate)\n"); + LLVM_DEBUG(BranchProbability Succ2Prob = getAdjustedProbability( + MBPI->getEdgeProbability(BB, Succ2), AdjustedSumProb); + dbgs() << " Selected: " << getBlockName(Succ2) + << ", probability: " << Succ2Prob + << " (Tail Duplicate)\n"); Result.BB = Succ2; Result.ShouldTailDup = true; return Result; @@ -1041,10 +1043,10 @@ MachineBlockPlacement::getBestTrellisSuccessor( ComputedEdges[BestB.Src] = { BestB.Dest, false }; auto TrellisSucc = BestA.Dest; - DEBUG(BranchProbability SuccProb = getAdjustedProbability( - MBPI->getEdgeProbability(BB, TrellisSucc), AdjustedSumProb); - dbgs() << " Selected: " << getBlockName(TrellisSucc) - << ", probability: " << SuccProb << " (Trellis)\n"); + LLVM_DEBUG(BranchProbability SuccProb = getAdjustedProbability( + MBPI->getEdgeProbability(BB, TrellisSucc), AdjustedSumProb); + dbgs() << " Selected: " << getBlockName(TrellisSucc) + << ", probability: " << SuccProb << " (Trellis)\n"); Result.BB = TrellisSucc; return Result; } @@ -1150,7 +1152,7 @@ void MachineBlockPlacement::precomputeTriangleChains() { if (TriangleChainCount == 0) return; - DEBUG(dbgs() << "Pre-computing triangle chains.\n"); + LLVM_DEBUG(dbgs() << "Pre-computing triangle chains.\n"); // Map from last block to the chain that contains it. This allows us to extend // chains as we find new triangles. DenseMap<const MachineBasicBlock *, TriangleChain> TriangleChainMap; @@ -1224,8 +1226,9 @@ void MachineBlockPlacement::precomputeTriangleChains() { MachineBasicBlock *dst = Chain.Edges.back(); Chain.Edges.pop_back(); for (MachineBasicBlock *src : reverse(Chain.Edges)) { - DEBUG(dbgs() << "Marking edge: " << getBlockName(src) << "->" << - getBlockName(dst) << " as pre-computed based on triangles.\n"); + LLVM_DEBUG(dbgs() << "Marking edge: " << getBlockName(src) << "->" + << getBlockName(dst) + << " as pre-computed based on triangles.\n"); auto InsertResult = ComputedEdges.insert({src, {dst, true}}); assert(InsertResult.second && "Block seen twice."); @@ -1431,15 +1434,15 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor( } if (BadCFGConflict) { - DEBUG(dbgs() << " Not a candidate: " << getBlockName(Succ) << " -> " << SuccProb - << " (prob) (non-cold CFG conflict)\n"); + LLVM_DEBUG(dbgs() << " Not a candidate: " << getBlockName(Succ) << " -> " + << SuccProb << " (prob) (non-cold CFG conflict)\n"); return true; } return false; } -/// \brief Select the best successor for a block. +/// Select the best successor for a block. /// /// This looks across all successors of a particular block and attempts to /// select the "best" one to be the layout successor. It only considers direct @@ -1462,7 +1465,8 @@ MachineBlockPlacement::selectBestSuccessor( auto AdjustedSumProb = collectViableSuccessors(BB, Chain, BlockFilter, Successors); - DEBUG(dbgs() << "Selecting best successor for: " << getBlockName(BB) << "\n"); + LLVM_DEBUG(dbgs() << "Selecting best successor for: " << getBlockName(BB) + << "\n"); // if we already precomputed the best successor for BB, return that if still // applicable. @@ -1503,18 +1507,18 @@ MachineBlockPlacement::selectBestSuccessor( continue; } - DEBUG( - dbgs() << " Candidate: " << getBlockName(Succ) << ", probability: " - << SuccProb + LLVM_DEBUG( + dbgs() << " Candidate: " << getBlockName(Succ) + << ", probability: " << SuccProb << (SuccChain.UnscheduledPredecessors != 0 ? " (CFG break)" : "") << "\n"); if (BestSucc.BB && BestProb >= SuccProb) { - DEBUG(dbgs() << " Not the best candidate, continuing\n"); + LLVM_DEBUG(dbgs() << " Not the best candidate, continuing\n"); continue; } - DEBUG(dbgs() << " Setting it as best candidate\n"); + LLVM_DEBUG(dbgs() << " Setting it as best candidate\n"); BestSucc.BB = Succ; BestProb = SuccProb; } @@ -1539,10 +1543,9 @@ MachineBlockPlacement::selectBestSuccessor( break; if (canTailDuplicateUnplacedPreds(BB, Succ, Chain, BlockFilter) && (isProfitableToTailDup(BB, Succ, BestProb, Chain, BlockFilter))) { - DEBUG( - dbgs() << " Candidate: " << getBlockName(Succ) << ", probability: " - << DupProb - << " (Tail Duplicate)\n"); + LLVM_DEBUG(dbgs() << " Candidate: " << getBlockName(Succ) + << ", probability: " << DupProb + << " (Tail Duplicate)\n"); BestSucc.BB = Succ; BestSucc.ShouldTailDup = true; break; @@ -1550,12 +1553,12 @@ MachineBlockPlacement::selectBestSuccessor( } if (BestSucc.BB) - DEBUG(dbgs() << " Selected: " << getBlockName(BestSucc.BB) << "\n"); + LLVM_DEBUG(dbgs() << " Selected: " << getBlockName(BestSucc.BB) << "\n"); return BestSucc; } -/// \brief Select the best block from a worklist. +/// Select the best block from a worklist. /// /// This looks through the provided worklist as a list of candidate basic /// blocks and select the most profitable one to place. The definition of @@ -1596,8 +1599,8 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( "Found CFG-violating block"); BlockFrequency CandidateFreq = MBFI->getBlockFreq(MBB); - DEBUG(dbgs() << " " << getBlockName(MBB) << " -> "; - MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n"); + LLVM_DEBUG(dbgs() << " " << getBlockName(MBB) << " -> "; + MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n"); // For ehpad, we layout the least probable first as to avoid jumping back // from least probable landingpads to more probable ones. @@ -1627,7 +1630,7 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( return BestBlock; } -/// \brief Retrieve the first unplaced basic block. +/// Retrieve the first unplaced basic block. /// /// This routine is called when we are unable to use the CFG to walk through /// all of the basic blocks and form a chain due to unnatural loops in the CFG. @@ -1723,8 +1726,8 @@ void MachineBlockPlacement::buildChain( if (!BestSucc) break; - DEBUG(dbgs() << "Unnatural loop CFG detected, forcibly merging the " - "layout successor until the CFG reduces\n"); + LLVM_DEBUG(dbgs() << "Unnatural loop CFG detected, forcibly merging the " + "layout successor until the CFG reduces\n"); } // Placement may have changed tail duplication opportunities. @@ -1743,18 +1746,18 @@ void MachineBlockPlacement::buildChain( // Zero out UnscheduledPredecessors for the successor we're about to merge in case // we selected a successor that didn't fit naturally into the CFG. SuccChain.UnscheduledPredecessors = 0; - DEBUG(dbgs() << "Merging from " << getBlockName(BB) << " to " - << getBlockName(BestSucc) << "\n"); + LLVM_DEBUG(dbgs() << "Merging from " << getBlockName(BB) << " to " + << getBlockName(BestSucc) << "\n"); markChainSuccessors(SuccChain, LoopHeaderBB, BlockFilter); Chain.merge(BestSucc, &SuccChain); BB = *std::prev(Chain.end()); } - DEBUG(dbgs() << "Finished forming chain for header block " - << getBlockName(*Chain.begin()) << "\n"); + LLVM_DEBUG(dbgs() << "Finished forming chain for header block " + << getBlockName(*Chain.begin()) << "\n"); } -/// \brief Find the best loop top block for layout. +/// Find the best loop top block for layout. /// /// Look for a block which is strictly better than the loop header for laying /// out at the top of the loop. This looks for one and only one pattern: @@ -1784,17 +1787,17 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L, if (!LoopBlockSet.count(*HeaderChain.begin())) return L.getHeader(); - DEBUG(dbgs() << "Finding best loop top for: " << getBlockName(L.getHeader()) - << "\n"); + LLVM_DEBUG(dbgs() << "Finding best loop top for: " + << getBlockName(L.getHeader()) << "\n"); BlockFrequency BestPredFreq; MachineBasicBlock *BestPred = nullptr; for (MachineBasicBlock *Pred : L.getHeader()->predecessors()) { if (!LoopBlockSet.count(Pred)) continue; - DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", has " - << Pred->succ_size() << " successors, "; - MBFI->printBlockFreq(dbgs(), Pred) << " freq\n"); + LLVM_DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", has " + << Pred->succ_size() << " successors, "; + MBFI->printBlockFreq(dbgs(), Pred) << " freq\n"); if (Pred->succ_size() > 1) continue; @@ -1809,7 +1812,7 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L, // If no direct predecessor is fine, just use the loop header. if (!BestPred) { - DEBUG(dbgs() << " final top unchanged\n"); + LLVM_DEBUG(dbgs() << " final top unchanged\n"); return L.getHeader(); } @@ -1819,11 +1822,11 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L, *BestPred->pred_begin() != L.getHeader()) BestPred = *BestPred->pred_begin(); - DEBUG(dbgs() << " final top: " << getBlockName(BestPred) << "\n"); + LLVM_DEBUG(dbgs() << " final top: " << getBlockName(BestPred) << "\n"); return BestPred; } -/// \brief Find the best loop exiting block for layout. +/// Find the best loop exiting block for layout. /// /// This routine implements the logic to analyze the loop looking for the best /// block to layout at the top of the loop. Typically this is done to maximize @@ -1851,8 +1854,8 @@ MachineBlockPlacement::findBestLoopExit(const MachineLoop &L, // blocks where rotating to exit with that block will reach an outer loop. SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop; - DEBUG(dbgs() << "Finding best loop exit for: " << getBlockName(L.getHeader()) - << "\n"); + LLVM_DEBUG(dbgs() << "Finding best loop exit for: " + << getBlockName(L.getHeader()) << "\n"); for (MachineBasicBlock *MBB : L.getBlocks()) { BlockChain &Chain = *BlockToChain[MBB]; // Ensure that this block is at the end of a chain; otherwise it could be @@ -1875,15 +1878,15 @@ MachineBlockPlacement::findBestLoopExit(const MachineLoop &L, BlockChain &SuccChain = *BlockToChain[Succ]; // Don't split chains, either this chain or the successor's chain. if (&Chain == &SuccChain) { - DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> " - << getBlockName(Succ) << " (chain conflict)\n"); + LLVM_DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> " + << getBlockName(Succ) << " (chain conflict)\n"); continue; } auto SuccProb = MBPI->getEdgeProbability(MBB, Succ); if (LoopBlockSet.count(Succ)) { - DEBUG(dbgs() << " looping: " << getBlockName(MBB) << " -> " - << getBlockName(Succ) << " (" << SuccProb << ")\n"); + LLVM_DEBUG(dbgs() << " looping: " << getBlockName(MBB) << " -> " + << getBlockName(Succ) << " (" << SuccProb << ")\n"); HasLoopingSucc = true; continue; } @@ -1896,9 +1899,10 @@ MachineBlockPlacement::findBestLoopExit(const MachineLoop &L, } BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(MBB) * SuccProb; - DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> " - << getBlockName(Succ) << " [L:" << SuccLoopDepth << "] ("; - MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n"); + LLVM_DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> " + << getBlockName(Succ) << " [L:" << SuccLoopDepth + << "] ("; + MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n"); // Note that we bias this toward an existing layout successor to retain // incoming order in the absence of better information. The exit must have // a frequency higher than the current exit before we consider breaking @@ -1922,11 +1926,12 @@ MachineBlockPlacement::findBestLoopExit(const MachineLoop &L, // Without a candidate exiting block or with only a single block in the // loop, just use the loop header to layout the loop. if (!ExitingBB) { - DEBUG(dbgs() << " No other candidate exit blocks, using loop header\n"); + LLVM_DEBUG( + dbgs() << " No other candidate exit blocks, using loop header\n"); return nullptr; } if (L.getNumBlocks() == 1) { - DEBUG(dbgs() << " Loop has 1 block, using loop header as exit\n"); + LLVM_DEBUG(dbgs() << " Loop has 1 block, using loop header as exit\n"); return nullptr; } @@ -1937,11 +1942,12 @@ MachineBlockPlacement::findBestLoopExit(const MachineLoop &L, !BlocksExitingToOuterLoop.count(ExitingBB)) return nullptr; - DEBUG(dbgs() << " Best exiting block: " << getBlockName(ExitingBB) << "\n"); + LLVM_DEBUG(dbgs() << " Best exiting block: " << getBlockName(ExitingBB) + << "\n"); return ExitingBB; } -/// \brief Attempt to rotate an exiting block to the bottom of the loop. +/// Attempt to rotate an exiting block to the bottom of the loop. /// /// Once we have built a chain, try to rotate it to line up the hot exit block /// with fallthrough out of the loop if doing so doesn't introduce unnecessary @@ -2014,12 +2020,12 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, return; } - DEBUG(dbgs() << "Rotating loop to put exit " << getBlockName(ExitingBB) - << " at bottom\n"); + LLVM_DEBUG(dbgs() << "Rotating loop to put exit " << getBlockName(ExitingBB) + << " at bottom\n"); std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end()); } -/// \brief Attempt to rotate a loop based on profile data to reduce branch cost. +/// Attempt to rotate a loop based on profile data to reduce branch cost. /// /// With profile data, we can determine the cost in terms of missed fall through /// opportunities when rotating a loop chain and select the best rotation. @@ -2150,8 +2156,9 @@ void MachineBlockPlacement::rotateLoopWithProfile( } } - DEBUG(dbgs() << "The cost of loop rotation by making " << getBlockName(*Iter) - << " to the top: " << Cost.getFrequency() << "\n"); + LLVM_DEBUG(dbgs() << "The cost of loop rotation by making " + << getBlockName(*Iter) + << " to the top: " << Cost.getFrequency() << "\n"); if (Cost < SmallestRotationCost) { SmallestRotationCost = Cost; @@ -2160,13 +2167,13 @@ void MachineBlockPlacement::rotateLoopWithProfile( } if (RotationPos != LoopChain.end()) { - DEBUG(dbgs() << "Rotate loop by making " << getBlockName(*RotationPos) - << " to the top\n"); + LLVM_DEBUG(dbgs() << "Rotate loop by making " << getBlockName(*RotationPos) + << " to the top\n"); std::rotate(LoopChain.begin(), RotationPos, LoopChain.end()); } } -/// \brief Collect blocks in the given loop that are to be placed. +/// Collect blocks in the given loop that are to be placed. /// /// When profile data is available, exclude cold blocks from the returned set; /// otherwise, collect all blocks in the loop. @@ -2202,7 +2209,7 @@ MachineBlockPlacement::collectLoopBlockSet(const MachineLoop &L) { return LoopBlockSet; } -/// \brief Forms basic block chains from the natural loop structures. +/// Forms basic block chains from the natural loop structures. /// /// These chains are designed to preserve the existing *structure* of the code /// as much as possible. We can then stitch the chains together in a way which @@ -2265,7 +2272,7 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) { else rotateLoop(LoopChain, PreferredLoopExit, LoopBlockSet); - DEBUG({ + LLVM_DEBUG({ // Crash at the end so we get all of the debugging output first. bool BadLoop = false; if (LoopChain.UnscheduledPredecessors) { @@ -2324,9 +2331,9 @@ void MachineBlockPlacement::buildCFGChains() { // Ensure that the layout successor is a viable block, as we know that // fallthrough is a possibility. assert(NextFI != FE && "Can't fallthrough past the last block."); - DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: " - << getBlockName(BB) << " -> " << getBlockName(NextBB) - << "\n"); + LLVM_DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: " + << getBlockName(BB) << " -> " << getBlockName(NextBB) + << "\n"); Chain->merge(NextBB, nullptr); #ifndef NDEBUG BlocksWithUnanalyzableExits.insert(&*BB); @@ -2356,7 +2363,7 @@ void MachineBlockPlacement::buildCFGChains() { #ifndef NDEBUG using FunctionBlockSetType = SmallPtrSet<MachineBasicBlock *, 16>; #endif - DEBUG({ + LLVM_DEBUG({ // Crash at the end so we get all of the debugging output first. bool BadFunc = false; FunctionBlockSetType FunctionBlockSet; @@ -2381,11 +2388,11 @@ void MachineBlockPlacement::buildCFGChains() { // Splice the blocks into place. MachineFunction::iterator InsertPos = F->begin(); - DEBUG(dbgs() << "[MBP] Function: "<< F->getName() << "\n"); + LLVM_DEBUG(dbgs() << "[MBP] Function: " << F->getName() << "\n"); for (MachineBasicBlock *ChainBB : FunctionChain) { - DEBUG(dbgs() << (ChainBB == *FunctionChain.begin() ? "Placing chain " - : " ... ") - << getBlockName(ChainBB) << "\n"); + LLVM_DEBUG(dbgs() << (ChainBB == *FunctionChain.begin() ? "Placing chain " + : " ... ") + << getBlockName(ChainBB) << "\n"); if (InsertPos != MachineFunction::iterator(ChainBB)) F->splice(InsertPos, ChainBB); else @@ -2470,11 +2477,11 @@ void MachineBlockPlacement::optimizeBranches() { MBPI->getEdgeProbability(ChainBB, FBB) > MBPI->getEdgeProbability(ChainBB, TBB) && !TII->reverseBranchCondition(Cond)) { - DEBUG(dbgs() << "Reverse order of the two branches: " - << getBlockName(ChainBB) << "\n"); - DEBUG(dbgs() << " Edge probability: " - << MBPI->getEdgeProbability(ChainBB, FBB) << " vs " - << MBPI->getEdgeProbability(ChainBB, TBB) << "\n"); + LLVM_DEBUG(dbgs() << "Reverse order of the two branches: " + << getBlockName(ChainBB) << "\n"); + LLVM_DEBUG(dbgs() << " Edge probability: " + << MBPI->getEdgeProbability(ChainBB, FBB) << " vs " + << MBPI->getEdgeProbability(ChainBB, TBB) << "\n"); DebugLoc dl; // FIXME: this is nowhere TII->removeBranch(*ChainBB); TII->insertBranch(*ChainBB, FBB, TBB, Cond, dl); @@ -2638,8 +2645,8 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock( if (!shouldTailDuplicate(BB)) return false; - DEBUG(dbgs() << "Redoing tail duplication for Succ#" - << BB->getNumber() << "\n"); + LLVM_DEBUG(dbgs() << "Redoing tail duplication for Succ#" << BB->getNumber() + << "\n"); // This has to be a callback because none of it can be done after // BB is deleted. @@ -2687,8 +2694,8 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock( if (RemBB == PreferredLoopExit) PreferredLoopExit = nullptr; - DEBUG(dbgs() << "TailDuplicator deleted block: " - << getBlockName(RemBB) << "\n"); + LLVM_DEBUG(dbgs() << "TailDuplicator deleted block: " + << getBlockName(RemBB) << "\n"); }; auto RemovalCallbackRef = function_ref<void(MachineBasicBlock*)>(RemovalCallback); @@ -2752,7 +2759,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { TailDupSize = TailDupPlacementAggressiveThreshold; TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>(); - // For agressive optimization, we can adjust some thresholds to be less + // For aggressive optimization, we can adjust some thresholds to be less // conservative. if (PassConfig->getOptLevel() >= CodeGenOpt::Aggressive) { // At O3 we should be more willing to copy blocks for tail duplication. This @@ -2834,17 +2841,17 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { namespace { -/// \brief A pass to compute block placement statistics. +/// A pass to compute block placement statistics. /// /// A separate pass to compute interesting statistics for evaluating block /// placement. This is separate from the actual placement pass so that they can /// be computed in the absence of any placement transformations or when using /// alternative placement strategies. class MachineBlockPlacementStats : public MachineFunctionPass { - /// \brief A handle to the branch probability pass. + /// A handle to the branch probability pass. const MachineBranchProbabilityInfo *MBPI; - /// \brief A handle to the function-wide block frequency pass. + /// A handle to the function-wide block frequency pass. const MachineBlockFrequencyInfo *MBFI; public: diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp index 53c0d840ac84..6c92b1d426d6 100644 --- a/contrib/llvm/lib/CodeGen/MachineCSE.cpp +++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp @@ -176,11 +176,10 @@ bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI, // class given a super-reg class and subreg index. if (DefMI->getOperand(1).getSubReg()) continue; - const TargetRegisterClass *RC = MRI->getRegClass(Reg); - if (!MRI->constrainRegClass(SrcReg, RC)) + if (!MRI->constrainRegAttrs(SrcReg, Reg)) continue; - DEBUG(dbgs() << "Coalescing: " << *DefMI); - DEBUG(dbgs() << "*** to: " << *MI); + LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI); + LLVM_DEBUG(dbgs() << "*** to: " << *MI); // Propagate SrcReg of copies to MI. MO.setReg(SrcReg); MRI->clearKillFlags(SrcReg); @@ -315,7 +314,7 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, unsigned LookAheadLeft = LookAheadLimit; while (LookAheadLeft) { // Skip over dbg_value's. - while (I != E && I != EE && I->isDebugValue()) + while (I != E && I != EE && I->isDebugInstr()) ++I; if (I == EE) { @@ -354,7 +353,7 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, bool MachineCSE::isCSECandidate(MachineInstr *MI) { if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() || - MI->isInlineAsm() || MI->isDebugValue()) + MI->isInlineAsm() || MI->isDebugInstr()) return false; // Ignore copies. @@ -446,25 +445,23 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, // Heuristics #3: If the common subexpression is used by PHIs, do not reuse // it unless the defined value is already used in the BB of the new use. bool HasPHI = false; - SmallPtrSet<MachineBasicBlock*, 4> CSBBs; - for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) { - HasPHI |= MI.isPHI(); - CSBBs.insert(MI.getParent()); + for (MachineInstr &UseMI : MRI->use_nodbg_instructions(CSReg)) { + HasPHI |= UseMI.isPHI(); + if (UseMI.getParent() == MI->getParent()) + return true; } - if (!HasPHI) - return true; - return CSBBs.count(MI->getParent()); + return !HasPHI; } void MachineCSE::EnterScope(MachineBasicBlock *MBB) { - DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n'); + LLVM_DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n'); ScopeType *Scope = new ScopeType(VNT); ScopeMap[MBB] = Scope; } void MachineCSE::ExitScope(MachineBasicBlock *MBB) { - DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); + LLVM_DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB); assert(SI != ScopeMap.end()); delete SI->second; @@ -548,13 +545,12 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // Found a common subexpression, eliminate it. unsigned CSVN = VNT.lookup(MI); MachineInstr *CSMI = Exps[CSVN]; - DEBUG(dbgs() << "Examining: " << *MI); - DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI); + LLVM_DEBUG(dbgs() << "Examining: " << *MI); + LLVM_DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI); // Check if it's profitable to perform this CSE. bool DoCSE = true; - unsigned NumDefs = MI->getDesc().getNumDefs() + - MI->getDesc().getNumImplicitDefs(); + unsigned NumDefs = MI->getNumDefs(); for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) { MachineOperand &MO = MI->getOperand(i); @@ -583,16 +579,17 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { "Do not CSE physical register defs!"); if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) { - DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); + LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); DoCSE = false; break; } - // Don't perform CSE if the result of the old instruction cannot exist - // within the register class of the new instruction. - const TargetRegisterClass *OldRC = MRI->getRegClass(OldReg); - if (!MRI->constrainRegClass(NewReg, OldRC)) { - DEBUG(dbgs() << "*** Not the same register class, avoid CSE!\n"); + // Don't perform CSE if the result of the new instruction cannot exist + // within the constraints (register class, bank, or low-level type) of + // the old instruction. + if (!MRI->constrainRegAttrs(NewReg, OldReg)) { + LLVM_DEBUG( + dbgs() << "*** Not the same register constraints, avoid CSE!\n"); DoCSE = false; break; } diff --git a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp index 702d21228477..0c6efff7bb40 100644 --- a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp @@ -39,8 +39,27 @@ inc_threshold("machine-combiner-inc-threshold", cl::Hidden, cl::desc("Incremental depth computation will be used for basic " "blocks with more instructions."), cl::init(500)); +static cl::opt<bool> dump_intrs("machine-combiner-dump-subst-intrs", cl::Hidden, + cl::desc("Dump all substituted intrs"), + cl::init(false)); + +#ifdef EXPENSIVE_CHECKS +static cl::opt<bool> VerifyPatternOrder( + "machine-combiner-verify-pattern-order", cl::Hidden, + cl::desc( + "Verify that the generated patterns are ordered by increasing latency"), + cl::init(true)); +#else +static cl::opt<bool> VerifyPatternOrder( + "machine-combiner-verify-pattern-order", cl::Hidden, + cl::desc( + "Verify that the generated patterns are ordered by increasing latency"), + cl::init(false)); +#endif + namespace { class MachineCombiner : public MachineFunctionPass { + const TargetSubtargetInfo *STI; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; MCSchedModel SchedModel; @@ -85,6 +104,14 @@ private: SmallVectorImpl<MachineInstr *> &DelInstrs); void instr2instrSC(SmallVectorImpl<MachineInstr *> &Instrs, SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC); + std::pair<unsigned, unsigned> + getLatenciesForInstrSequences(MachineInstr &MI, + SmallVectorImpl<MachineInstr *> &InsInstrs, + SmallVectorImpl<MachineInstr *> &DelInstrs, + MachineTraceMetrics::Trace BlockTrace); + + void verifyPatternOrder(MachineBasicBlock *MBB, MachineInstr &Root, + SmallVector<MachineCombinerPattern, 16> &Patterns); }; } @@ -140,9 +167,6 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs, // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth for (auto *InstrPtr : InsInstrs) { // for each Use unsigned IDepth = 0; - DEBUG(dbgs() << "NEW INSTR "; - InstrPtr->print(dbgs(), TII); - dbgs() << "\n";); for (const MachineOperand &MO : InstrPtr->operands()) { // Check for virtual register operand. if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))) @@ -242,6 +266,29 @@ static CombinerObjective getCombinerObjective(MachineCombinerPattern P) { } } +/// Estimate the latency of the new and original instruction sequence by summing +/// up the latencies of the inserted and deleted instructions. This assumes +/// that the inserted and deleted instructions are dependent instruction chains, +/// which might not hold in all cases. +std::pair<unsigned, unsigned> MachineCombiner::getLatenciesForInstrSequences( + MachineInstr &MI, SmallVectorImpl<MachineInstr *> &InsInstrs, + SmallVectorImpl<MachineInstr *> &DelInstrs, + MachineTraceMetrics::Trace BlockTrace) { + assert(!InsInstrs.empty() && "Only support sequences that insert instrs."); + unsigned NewRootLatency = 0; + // NewRoot is the last instruction in the \p InsInstrs vector. + MachineInstr *NewRoot = InsInstrs.back(); + for (unsigned i = 0; i < InsInstrs.size() - 1; i++) + NewRootLatency += TSchedModel.computeInstrLatency(InsInstrs[i]); + NewRootLatency += getLatency(&MI, NewRoot, BlockTrace); + + unsigned RootLatency = 0; + for (auto I : DelInstrs) + RootLatency += TSchedModel.computeInstrLatency(I); + + return {NewRootLatency, RootLatency}; +} + /// The DAGCombine code sequence ends in MI (Machine Instruction) Root. /// The new code sequence ends in MI NewRoot. A necessary condition for the new /// sequence to replace the old sequence is that it cannot lengthen the critical @@ -257,56 +304,50 @@ bool MachineCombiner::improvesCriticalPathLen( bool SlackIsAccurate) { assert(TSchedModel.hasInstrSchedModelOrItineraries() && "Missing machine model\n"); - // NewRoot is the last instruction in the \p InsInstrs vector. - unsigned NewRootIdx = InsInstrs.size() - 1; - MachineInstr *NewRoot = InsInstrs[NewRootIdx]; - // Get depth and latency of NewRoot and Root. unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace); unsigned RootDepth = BlockTrace.getInstrCycles(*Root).Depth; - DEBUG(dbgs() << "DEPENDENCE DATA FOR " << *Root << "\n"; - dbgs() << " NewRootDepth: " << NewRootDepth << "\n"; - dbgs() << " RootDepth: " << RootDepth << "\n"); + LLVM_DEBUG(dbgs() << " Dependence data for " << *Root << "\tNewRootDepth: " + << NewRootDepth << "\tRootDepth: " << RootDepth); // For a transform such as reassociation, the cost equation is // conservatively calculated so that we must improve the depth (data // dependency cycles) in the critical path to proceed with the transform. // Being conservative also protects against inaccuracies in the underlying // machine trace metrics and CPU models. - if (getCombinerObjective(Pattern) == CombinerObjective::MustReduceDepth) + if (getCombinerObjective(Pattern) == CombinerObjective::MustReduceDepth) { + LLVM_DEBUG(dbgs() << "\tIt MustReduceDepth "); + LLVM_DEBUG(NewRootDepth < RootDepth + ? dbgs() << "\t and it does it\n" + : dbgs() << "\t but it does NOT do it\n"); return NewRootDepth < RootDepth; + } // A more flexible cost calculation for the critical path includes the slack // of the original code sequence. This may allow the transform to proceed // even if the instruction depths (data dependency cycles) become worse. // Account for the latency of the inserted and deleted instructions by - // adding up their latencies. This assumes that the inserted and deleted - // instructions are dependent instruction chains, which might not hold - // in all cases. - unsigned NewRootLatency = 0; - for (unsigned i = 0; i < InsInstrs.size() - 1; i++) - NewRootLatency += TSchedModel.computeInstrLatency(InsInstrs[i]); - NewRootLatency += getLatency(Root, NewRoot, BlockTrace); - - unsigned RootLatency = 0; - for (auto I : DelInstrs) - RootLatency += TSchedModel.computeInstrLatency(I); + unsigned NewRootLatency, RootLatency; + std::tie(NewRootLatency, RootLatency) = + getLatenciesForInstrSequences(*Root, InsInstrs, DelInstrs, BlockTrace); unsigned RootSlack = BlockTrace.getInstrSlack(*Root); unsigned NewCycleCount = NewRootDepth + NewRootLatency; - unsigned OldCycleCount = RootDepth + RootLatency + - (SlackIsAccurate ? RootSlack : 0); - DEBUG(dbgs() << " NewRootLatency: " << NewRootLatency << "\n"; - dbgs() << " RootLatency: " << RootLatency << "\n"; - dbgs() << " RootSlack: " << RootSlack << " SlackIsAccurate=" - << SlackIsAccurate << "\n"; - dbgs() << " NewRootDepth + NewRootLatency = " - << NewCycleCount << "\n"; - dbgs() << " RootDepth + RootLatency + RootSlack = " - << OldCycleCount << "\n"; - ); + unsigned OldCycleCount = + RootDepth + RootLatency + (SlackIsAccurate ? RootSlack : 0); + LLVM_DEBUG(dbgs() << "\n\tNewRootLatency: " << NewRootLatency + << "\tRootLatency: " << RootLatency << "\n\tRootSlack: " + << RootSlack << " SlackIsAccurate=" << SlackIsAccurate + << "\n\tNewRootDepth + NewRootLatency = " << NewCycleCount + << "\n\tRootDepth + RootLatency + RootSlack = " + << OldCycleCount;); + LLVM_DEBUG(NewCycleCount <= OldCycleCount + ? dbgs() << "\n\t It IMPROVES PathLen because" + : dbgs() << "\n\t It DOES NOT improve PathLen because"); + LLVM_DEBUG(dbgs() << "\n\t\tNewCycleCount = " << NewCycleCount + << ", OldCycleCount = " << OldCycleCount << "\n"); return NewCycleCount <= OldCycleCount; } @@ -352,9 +393,14 @@ bool MachineCombiner::preservesResourceLen( unsigned ResLenAfterCombine = BlockTrace.getResourceLength(MBBarr, MSCInsArr, MSCDelArr); - DEBUG(dbgs() << "RESOURCE DATA: \n"; - dbgs() << " resource len before: " << ResLenBeforeCombine - << " after: " << ResLenAfterCombine << "\n";); + LLVM_DEBUG(dbgs() << "\t\tResource length before replacement: " + << ResLenBeforeCombine + << " and after: " << ResLenAfterCombine << "\n";); + LLVM_DEBUG( + ResLenAfterCombine <= ResLenBeforeCombine + ? dbgs() << "\t\t As result it IMPROVES/PRESERVES Resource Length\n" + : dbgs() << "\t\t As result it DOES NOT improve/preserve Resource " + "Length\n"); return ResLenAfterCombine <= ResLenBeforeCombine; } @@ -409,6 +455,35 @@ static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI, NumInstCombined++; } +// Check that the difference between original and new latency is decreasing for +// later patterns. This helps to discover sub-optimal pattern orderings. +void MachineCombiner::verifyPatternOrder( + MachineBasicBlock *MBB, MachineInstr &Root, + SmallVector<MachineCombinerPattern, 16> &Patterns) { + long PrevLatencyDiff = std::numeric_limits<long>::max(); + (void)PrevLatencyDiff; // Variable is used in assert only. + for (auto P : Patterns) { + SmallVector<MachineInstr *, 16> InsInstrs; + SmallVector<MachineInstr *, 16> DelInstrs; + DenseMap<unsigned, unsigned> InstrIdxForVirtReg; + TII->genAlternativeCodeSequence(Root, P, InsInstrs, DelInstrs, + InstrIdxForVirtReg); + // Found pattern, but did not generate alternative sequence. + // This can happen e.g. when an immediate could not be materialized + // in a single instruction. + if (InsInstrs.empty() || !TSchedModel.hasInstrSchedModelOrItineraries()) + continue; + + unsigned NewRootLatency, RootLatency; + std::tie(NewRootLatency, RootLatency) = getLatenciesForInstrSequences( + Root, InsInstrs, DelInstrs, MinInstr->getTrace(MBB)); + long CurrentLatencyDiff = ((long)RootLatency) - ((long)NewRootLatency); + assert(CurrentLatencyDiff <= PrevLatencyDiff && + "Current pattern is better than previous pattern."); + PrevLatencyDiff = CurrentLatencyDiff; + } +} + /// Substitute a slow code sequence with a faster one by /// evaluating instruction combining pattern. /// The prototype of such a pattern is MUl + ADD -> MADD. Performs instruction @@ -418,7 +493,7 @@ static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI, /// sequence is shorter. bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { bool Changed = false; - DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n"); + LLVM_DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n"); bool IncrementalUpdate = false; auto BlockIter = MBB->begin(); @@ -433,8 +508,6 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { while (BlockIter != MBB->end()) { auto &MI = *BlockIter++; - - DEBUG(dbgs() << "INSTR "; MI.dump(); dbgs() << "\n";); SmallVector<MachineCombinerPattern, 16> Patterns; // The motivating example is: // @@ -459,11 +532,16 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { // The algorithm does not try to evaluate all patterns and pick the best. // This is only an artificial restriction though. In practice there is // mostly one pattern, and getMachineCombinerPatterns() can order patterns - // based on an internal cost heuristic. + // based on an internal cost heuristic. If + // machine-combiner-verify-pattern-order is enabled, all patterns are + // checked to ensure later patterns do not provide better latency savings. if (!TII->getMachineCombinerPatterns(MI, Patterns)) continue; + if (VerifyPatternOrder) + verifyPatternOrder(MBB, MI, Patterns); + for (auto P : Patterns) { SmallVector<MachineInstr *, 16> InsInstrs; SmallVector<MachineInstr *, 16> DelInstrs; @@ -478,6 +556,19 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { if (!NewInstCount) continue; + LLVM_DEBUG(if (dump_intrs) { + dbgs() << "\tFor the Pattern (" << (int)P << ") these instructions could be removed\n"; + for (auto const *InstrPtr : DelInstrs) { + dbgs() << "\t\t" << STI->getSchedInfoStr(*InstrPtr) << ": "; + InstrPtr->print(dbgs(), false, false, false, TII); + } + dbgs() << "\tThese instructions could replace the removed ones\n"; + for (auto const *InstrPtr : InsInstrs) { + dbgs() << "\t\t" << STI->getSchedInfoStr(*InstrPtr) << ": "; + InstrPtr->print(dbgs(), false, false, false, TII); + } + }); + bool SubstituteAlways = false; if (ML && TII->isThroughputPattern(P)) SubstituteAlways = true; @@ -539,20 +630,22 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { } bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) { - const TargetSubtargetInfo &STI = MF.getSubtarget(); - TII = STI.getInstrInfo(); - TRI = STI.getRegisterInfo(); - SchedModel = STI.getSchedModel(); - TSchedModel.init(SchedModel, &STI, TII); + STI = &MF.getSubtarget(); + TII = STI->getInstrInfo(); + TRI = STI->getRegisterInfo(); + SchedModel = STI->getSchedModel(); + TSchedModel.init(STI); MRI = &MF.getRegInfo(); MLI = &getAnalysis<MachineLoopInfo>(); Traces = &getAnalysis<MachineTraceMetrics>(); MinInstr = nullptr; OptSize = MF.getFunction().optForSize(); - DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n'); + LLVM_DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n'); if (!TII->useMachineCombiner()) { - DEBUG(dbgs() << " Skipping pass: Target does not support machine combiner\n"); + LLVM_DEBUG( + dbgs() + << " Skipping pass: Target does not support machine combiner\n"); return false; } diff --git a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp index fcec05adc732..3bf8147a06c3 100644 --- a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -9,6 +9,35 @@ // // This is an extremely simple MachineInstr-level copy propagation pass. // +// This pass forwards the source of COPYs to the users of their destinations +// when doing so is legal. For example: +// +// %reg1 = COPY %reg0 +// ... +// ... = OP %reg1 +// +// If +// - %reg0 has not been clobbered by the time of the use of %reg1 +// - the register class constraints are satisfied +// - the COPY def is the only value that reaches OP +// then this pass replaces the above with: +// +// %reg1 = COPY %reg0 +// ... +// ... = OP %reg0 +// +// This pass also removes some redundant COPYs. For example: +// +// %R1 = COPY %R0 +// ... // No clobber of %R1 +// %R0 = COPY %R1 <<< Removed +// +// or +// +// %R1 = COPY %R0 +// ... // No clobber of %R0 +// %R1 = COPY %R0 <<< Removed +// //===----------------------------------------------------------------------===// #include "llvm/ADT/DenseMap.h" @@ -23,11 +52,13 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/DebugCounter.h" #include "llvm/Support/raw_ostream.h" #include <cassert> #include <iterator> @@ -37,6 +68,9 @@ using namespace llvm; #define DEBUG_TYPE "machine-cp" STATISTIC(NumDeletes, "Number of dead copies deleted"); +STATISTIC(NumCopyForwards, "Number of copy uses forwarded"); +DEBUG_COUNTER(FwdCounter, "machine-cp-fwd", + "Controls which register COPYs are forwarded"); namespace { @@ -73,6 +107,10 @@ using Reg2MIMap = DenseMap<unsigned, MachineInstr *>; void ReadRegister(unsigned Reg); void CopyPropagateBlock(MachineBasicBlock &MBB); bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def); + void forwardUses(MachineInstr &MI); + bool isForwardableRegClassCopy(const MachineInstr &Copy, + const MachineInstr &UseI, unsigned UseIdx); + bool hasImplicitOverlap(const MachineInstr &MI, const MachineOperand &Use); /// Candidates for deletion. SmallSetVector<MachineInstr*, 8> MaybeDeadCopies; @@ -143,7 +181,8 @@ void MachineCopyPropagation::ReadRegister(unsigned Reg) { for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { Reg2MIMap::iterator CI = CopyMap.find(*AI); if (CI != CopyMap.end()) { - DEBUG(dbgs() << "MCP: Copy is used - not dead: "; CI->second->dump()); + LLVM_DEBUG(dbgs() << "MCP: Copy is used - not dead: "; + CI->second->dump()); MaybeDeadCopies.remove(CI->second); } } @@ -191,7 +230,7 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, unsigned Src, if (!isNopCopy(PrevCopy, Src, Def, TRI)) return false; - DEBUG(dbgs() << "MCP: copy is a NOP, removing: "; Copy.dump()); + LLVM_DEBUG(dbgs() << "MCP: copy is a NOP, removing: "; Copy.dump()); // Copy was redundantly redefining either Src or Def. Remove earlier kill // flags between Copy and PrevCopy because the value will be reused now. @@ -208,14 +247,163 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, unsigned Src, return true; } +/// Decide whether we should forward the source of \param Copy to its use in +/// \param UseI based on the physical register class constraints of the opcode +/// and avoiding introducing more cross-class COPYs. +bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy, + const MachineInstr &UseI, + unsigned UseIdx) { + + unsigned CopySrcReg = Copy.getOperand(1).getReg(); + + // If the new register meets the opcode register constraints, then allow + // forwarding. + if (const TargetRegisterClass *URC = + UseI.getRegClassConstraint(UseIdx, TII, TRI)) + return URC->contains(CopySrcReg); + + if (!UseI.isCopy()) + return false; + + /// COPYs don't have register class constraints, so if the user instruction + /// is a COPY, we just try to avoid introducing additional cross-class + /// COPYs. For example: + /// + /// RegClassA = COPY RegClassB // Copy parameter + /// ... + /// RegClassB = COPY RegClassA // UseI parameter + /// + /// which after forwarding becomes + /// + /// RegClassA = COPY RegClassB + /// ... + /// RegClassB = COPY RegClassB + /// + /// so we have reduced the number of cross-class COPYs and potentially + /// introduced a nop COPY that can be removed. + const TargetRegisterClass *UseDstRC = + TRI->getMinimalPhysRegClass(UseI.getOperand(0).getReg()); + + const TargetRegisterClass *SuperRC = UseDstRC; + for (TargetRegisterClass::sc_iterator SuperRCI = UseDstRC->getSuperClasses(); + SuperRC; SuperRC = *SuperRCI++) + if (SuperRC->contains(CopySrcReg)) + return true; + + return false; +} + +/// Check that \p MI does not have implicit uses that overlap with it's \p Use +/// operand (the register being replaced), since these can sometimes be +/// implicitly tied to other operands. For example, on AMDGPU: +/// +/// V_MOVRELS_B32_e32 %VGPR2, %M0<imp-use>, %EXEC<imp-use>, %VGPR2_VGPR3_VGPR4_VGPR5<imp-use> +/// +/// the %VGPR2 is implicitly tied to the larger reg operand, but we have no +/// way of knowing we need to update the latter when updating the former. +bool MachineCopyPropagation::hasImplicitOverlap(const MachineInstr &MI, + const MachineOperand &Use) { + for (const MachineOperand &MIUse : MI.uses()) + if (&MIUse != &Use && MIUse.isReg() && MIUse.isImplicit() && + MIUse.isUse() && TRI->regsOverlap(Use.getReg(), MIUse.getReg())) + return true; + + return false; +} + +/// Look for available copies whose destination register is used by \p MI and +/// replace the use in \p MI with the copy's source register. +void MachineCopyPropagation::forwardUses(MachineInstr &MI) { + if (AvailCopyMap.empty()) + return; + + // Look for non-tied explicit vreg uses that have an active COPY + // instruction that defines the physical register allocated to them. + // Replace the vreg with the source of the active COPY. + for (unsigned OpIdx = 0, OpEnd = MI.getNumOperands(); OpIdx < OpEnd; + ++OpIdx) { + MachineOperand &MOUse = MI.getOperand(OpIdx); + // Don't forward into undef use operands since doing so can cause problems + // with the machine verifier, since it doesn't treat undef reads as reads, + // so we can end up with a live range that ends on an undef read, leading to + // an error that the live range doesn't end on a read of the live range + // register. + if (!MOUse.isReg() || MOUse.isTied() || MOUse.isUndef() || MOUse.isDef() || + MOUse.isImplicit()) + continue; + + if (!MOUse.getReg()) + continue; + + // Check that the register is marked 'renamable' so we know it is safe to + // rename it without violating any constraints that aren't expressed in the + // IR (e.g. ABI or opcode requirements). + if (!MOUse.isRenamable()) + continue; + + auto CI = AvailCopyMap.find(MOUse.getReg()); + if (CI == AvailCopyMap.end()) + continue; + + MachineInstr &Copy = *CI->second; + unsigned CopyDstReg = Copy.getOperand(0).getReg(); + const MachineOperand &CopySrc = Copy.getOperand(1); + unsigned CopySrcReg = CopySrc.getReg(); + + // FIXME: Don't handle partial uses of wider COPYs yet. + if (MOUse.getReg() != CopyDstReg) { + LLVM_DEBUG( + dbgs() << "MCP: FIXME! Not forwarding COPY to sub-register use:\n " + << MI); + continue; + } + + // Don't forward COPYs of reserved regs unless they are constant. + if (MRI->isReserved(CopySrcReg) && !MRI->isConstantPhysReg(CopySrcReg)) + continue; + + if (!isForwardableRegClassCopy(Copy, MI, OpIdx)) + continue; + + if (hasImplicitOverlap(MI, MOUse)) + continue; + + if (!DebugCounter::shouldExecute(FwdCounter)) { + LLVM_DEBUG(dbgs() << "MCP: Skipping forwarding due to debug counter:\n " + << MI); + continue; + } + + LLVM_DEBUG(dbgs() << "MCP: Replacing " << printReg(MOUse.getReg(), TRI) + << "\n with " << printReg(CopySrcReg, TRI) + << "\n in " << MI << " from " << Copy); + + MOUse.setReg(CopySrcReg); + if (!CopySrc.isRenamable()) + MOUse.setIsRenamable(false); + + LLVM_DEBUG(dbgs() << "MCP: After replacement: " << MI << "\n"); + + // Clear kill markers that may have been invalidated. + for (MachineInstr &KMI : + make_range(Copy.getIterator(), std::next(MI.getIterator()))) + KMI.clearRegisterKills(CopySrcReg, TRI); + + ++NumCopyForwards; + Changed = true; + } +} + void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { - DEBUG(dbgs() << "MCP: CopyPropagateBlock " << MBB.getName() << "\n"); + LLVM_DEBUG(dbgs() << "MCP: CopyPropagateBlock " << MBB.getName() << "\n"); for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) { MachineInstr *MI = &*I; ++I; - if (MI->isCopy()) { + // Analyze copies (which don't overlap themselves). + if (MI->isCopy() && !TRI->regsOverlap(MI->getOperand(0).getReg(), + MI->getOperand(1).getReg())) { unsigned Def = MI->getOperand(0).getReg(); unsigned Src = MI->getOperand(1).getReg(); @@ -241,6 +429,11 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { if (eraseIfRedundant(*MI, Def, Src) || eraseIfRedundant(*MI, Src, Def)) continue; + forwardUses(*MI); + + // Src may have been changed by forwardUses() + Src = MI->getOperand(1).getReg(); + // If Src is defined by a previous copy, the previous copy cannot be // eliminated. ReadRegister(Src); @@ -253,7 +446,7 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { ReadRegister(Reg); } - DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI->dump()); + LLVM_DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI->dump()); // Copy is now a candidate for deletion. if (!MRI->isReserved(Def)) @@ -292,6 +485,20 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { continue; } + // Clobber any earlyclobber regs first. + for (const MachineOperand &MO : MI->operands()) + if (MO.isReg() && MO.isEarlyClobber()) { + unsigned Reg = MO.getReg(); + // If we have a tied earlyclobber, that means it is also read by this + // instruction, so we need to make sure we don't remove it as dead + // later. + if (MO.isTied()) + ReadRegister(Reg); + ClobberRegister(Reg); + } + + forwardUses(*MI); + // Not a copy. SmallVector<unsigned, 2> Defs; const MachineOperand *RegMask = nullptr; @@ -307,10 +514,10 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { assert(!TargetRegisterInfo::isVirtualRegister(Reg) && "MachineCopyPropagation should be run after register allocation!"); - if (MO.isDef()) { + if (MO.isDef() && !MO.isEarlyClobber()) { Defs.push_back(Reg); continue; - } else if (MO.readsReg()) + } else if (!MO.isDebug() && MO.readsReg()) ReadRegister(Reg); } @@ -331,8 +538,8 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { continue; } - DEBUG(dbgs() << "MCP: Removing copy due to regmask clobbering: "; - MaybeDead->dump()); + LLVM_DEBUG(dbgs() << "MCP: Removing copy due to regmask clobbering: "; + MaybeDead->dump()); // erase() will return the next valid iterator pointing to the next // element after the erased one. @@ -364,6 +571,8 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { // since we don't want to trust live-in lists. if (MBB.succ_empty()) { for (MachineInstr *MaybeDead : MaybeDeadCopies) { + LLVM_DEBUG(dbgs() << "MCP: Removing copy due to no live-out succ: "; + MaybeDead->dump()); assert(!MRI->isReserved(MaybeDead->getOperand(0).getReg())); MaybeDead->eraseFromParent(); Changed = true; diff --git a/contrib/llvm/lib/CodeGen/MachineDominators.cpp b/contrib/llvm/lib/CodeGen/MachineDominators.cpp index 517ac29b6450..6b2802626456 100644 --- a/contrib/llvm/lib/CodeGen/MachineDominators.cpp +++ b/contrib/llvm/lib/CodeGen/MachineDominators.cpp @@ -65,8 +65,21 @@ void MachineDominatorTree::releaseMemory() { } void MachineDominatorTree::verifyAnalysis() const { - if (DT && VerifyMachineDomInfo) - verifyDomTree(); + if (DT && VerifyMachineDomInfo) { + MachineFunction &F = *getRoot()->getParent(); + + DomTreeBase<MachineBasicBlock> OtherDT; + OtherDT.recalculate(F); + if (getRootNode()->getBlock() != OtherDT.getRootNode()->getBlock() || + DT->compare(OtherDT)) { + errs() << "MachineDominatorTree for function " << F.getName() + << " is not up to date!\nComputed:\n"; + DT->print(errs()); + errs() << "\nActual:\n"; + OtherDT.print(errs()); + abort(); + } + } } void MachineDominatorTree::print(raw_ostream &OS, const Module*) const { @@ -138,21 +151,3 @@ void MachineDominatorTree::applySplitCriticalEdges() const { NewBBs.clear(); CriticalEdgesToSplit.clear(); } - -void MachineDominatorTree::verifyDomTree() const { - if (!DT) - return; - MachineFunction &F = *getRoot()->getParent(); - - DomTreeBase<MachineBasicBlock> OtherDT; - OtherDT.recalculate(F); - if (getRootNode()->getBlock() != OtherDT.getRootNode()->getBlock() || - DT->compare(OtherDT)) { - errs() << "MachineDominatorTree for function " << F.getName() - << " is not up to date!\nComputed:\n"; - DT->print(errs()); - errs() << "\nActual:\n"; - OtherDT.print(errs()); - abort(); - } -} diff --git a/contrib/llvm/lib/CodeGen/MachineFrameInfo.cpp b/contrib/llvm/lib/CodeGen/MachineFrameInfo.cpp index 2aa9d6b816c8..0b316871dbdf 100644 --- a/contrib/llvm/lib/CodeGen/MachineFrameInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineFrameInfo.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include <cassert> @@ -40,9 +41,9 @@ static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align, unsigned StackAlign) { if (!ShouldClamp || Align <= StackAlign) return Align; - DEBUG(dbgs() << "Warning: requested alignment " << Align - << " exceeds the stack alignment " << StackAlign - << " when stack realignment is off" << '\n'); + LLVM_DEBUG(dbgs() << "Warning: requested alignment " << Align + << " exceeds the stack alignment " << StackAlign + << " when stack realignment is off" << '\n'); return StackAlign; } @@ -217,7 +218,7 @@ void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{ OS << " fi#" << (int)(i-NumFixedObjects) << ": "; if (SO.StackID != 0) - OS << "id=" << SO.StackID << ' '; + OS << "id=" << static_cast<unsigned>(SO.StackID) << ' '; if (SO.Size == ~0ULL) { OS << "dead\n"; diff --git a/contrib/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm/lib/CodeGen/MachineFunction.cpp index bc8eb1429d92..dd668bcf6193 100644 --- a/contrib/llvm/lib/CodeGen/MachineFunction.cpp +++ b/contrib/llvm/lib/CodeGen/MachineFunction.cpp @@ -37,7 +37,9 @@ #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/WasmEHFuncInfo.h" #include "llvm/CodeGen/WinEHFuncInfo.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" @@ -174,6 +176,11 @@ void MachineFunction::init() { WinEHInfo = new (Allocator) WinEHFuncInfo(); } + if (isScopedEHPersonality(classifyEHPersonality( + F.hasPersonalityFn() ? F.getPersonalityFn() : nullptr))) { + WasmEHInfo = new (Allocator) WasmEHFuncInfo(); + } + assert(Target.isCompatibleDataLayout(getDataLayout()) && "Can't create a MachineFunction using a Module with a " "Target-incompatible DataLayout attached\n"); @@ -195,6 +202,7 @@ void MachineFunction::clear() { // Do call MachineBasicBlock destructors, it contains std::vectors. for (iterator I = begin(), E = end(); I != E; I = BasicBlocks.erase(I)) I->Insts.clearAndLeakNodesUnsafely(); + MBBNumbering.clear(); InstructionRecycler.clear(Allocator); OperandRecycler.clear(Allocator); @@ -478,6 +486,14 @@ const char *MachineFunction::createExternalSymbolName(StringRef Name) { return Dest; } +uint32_t *MachineFunction::allocateRegMask() { + unsigned NumRegs = getSubtarget().getRegisterInfo()->getNumRegs(); + unsigned Size = MachineOperand::getRegMaskSize(NumRegs); + uint32_t *Mask = Allocator.Allocate<uint32_t>(Size); + memset(Mask, 0, Size * sizeof(Mask[0])); + return Mask; +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void MachineFunction::dump() const { print(dbgs()); @@ -522,7 +538,8 @@ void MachineFunction::print(raw_ostream &OS, const SlotIndexes *Indexes) const { MST.incorporateFunction(getFunction()); for (const auto &BB : *this) { OS << '\n'; - BB.print(OS, MST, Indexes); + // If we print the whole function, print it at its most verbose level. + BB.print(OS, MST, Indexes, /*IsStandalone=*/true); } OS << "\n# End machine code for function " << getName() << ".\n\n"; diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp index 5ffe33006131..67ac95740e3e 100644 --- a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp +++ b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp @@ -24,7 +24,6 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/StackProtector.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" @@ -85,7 +84,6 @@ void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<MemoryDependenceWrapperPass>(); AU.addPreserved<ScalarEvolutionWrapperPass>(); AU.addPreserved<SCEVAAWrapperPass>(); - AU.addPreserved<StackProtector>(); FunctionPass::getAnalysisUsage(AU); } diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp index 14655c6eb700..96fcfdb72ad7 100644 --- a/contrib/llvm/lib/CodeGen/MachineInstr.cpp +++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp @@ -37,6 +37,7 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" @@ -74,6 +75,29 @@ using namespace llvm; +static const MachineFunction *getMFIfAvailable(const MachineInstr &MI) { + if (const MachineBasicBlock *MBB = MI.getParent()) + if (const MachineFunction *MF = MBB->getParent()) + return MF; + return nullptr; +} + +// Try to crawl up to the machine function and get TRI and IntrinsicInfo from +// it. +static void tryToGetTargetInfo(const MachineInstr &MI, + const TargetRegisterInfo *&TRI, + const MachineRegisterInfo *&MRI, + const TargetIntrinsicInfo *&IntrinsicInfo, + const TargetInstrInfo *&TII) { + + if (const MachineFunction *MF = getMFIfAvailable(MI)) { + TRI = MF->getSubtarget().getRegisterInfo(); + MRI = &MF->getRegInfo(); + IntrinsicInfo = MF->getTarget().getIntrinsicInfo(); + TII = MF->getSubtarget().getInstrInfo(); + } +} + void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) { if (MCID->ImplicitDefs) for (const MCPhysReg *ImpDefs = MCID->getImplicitDefs(); *ImpDefs; @@ -358,6 +382,12 @@ MachineInstr::mergeMemRefsWith(const MachineInstr& Other) { return std::make_pair(MemBegin, CombinedNumMemRefs); } +uint16_t MachineInstr::mergeFlagsWith(const MachineInstr &Other) const { + // For now, the just return the union of the flags. If the flags get more + // complicated over time, we might need more logic here. + return getFlags() | Other.getFlags(); +} + bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const { assert(!isBundledWithPred() && "Must be called on bundle header"); for (MachineBasicBlock::const_instr_iterator MII = getIterator();; ++MII) { @@ -437,8 +467,8 @@ bool MachineInstr::isIdenticalTo(const MachineInstr &Other, return false; } } - // If DebugLoc does not match then two dbg.values are not identical. - if (isDebugValue()) + // If DebugLoc does not match then two debug instructions are not identical. + if (isDebugInstr()) if (getDebugLoc() && Other.getDebugLoc() && getDebugLoc() != Other.getDebugLoc()) return false; @@ -489,21 +519,39 @@ void MachineInstr::eraseFromBundle() { getParent()->erase_instr(this); } -/// getNumExplicitOperands - Returns the number of non-implicit operands. -/// unsigned MachineInstr::getNumExplicitOperands() const { unsigned NumOperands = MCID->getNumOperands(); if (!MCID->isVariadic()) return NumOperands; - for (unsigned i = NumOperands, e = getNumOperands(); i != e; ++i) { - const MachineOperand &MO = getOperand(i); - if (!MO.isReg() || !MO.isImplicit()) - NumOperands++; + for (unsigned I = NumOperands, E = getNumOperands(); I != E; ++I) { + const MachineOperand &MO = getOperand(I); + // The operands must always be in the following order: + // - explicit reg defs, + // - other explicit operands (reg uses, immediates, etc.), + // - implicit reg defs + // - implicit reg uses + if (MO.isReg() && MO.isImplicit()) + break; + ++NumOperands; } return NumOperands; } +unsigned MachineInstr::getNumExplicitDefs() const { + unsigned NumDefs = MCID->getNumDefs(); + if (!MCID->isVariadic()) + return NumDefs; + + for (unsigned I = NumDefs, E = getNumOperands(); I != E; ++I) { + const MachineOperand &MO = getOperand(I); + if (!MO.isReg() || !MO.isDef() || MO.isImplicit()) + break; + ++NumDefs; + } + return NumDefs; +} + void MachineInstr::bundleWithPred() { assert(!isBundledWithPred() && "MI is already bundled with its predecessor"); setFlag(BundledPred); @@ -583,6 +631,11 @@ int MachineInstr::findInlineAsmFlagIdx(unsigned OpIdx, return -1; } +const DILabel *MachineInstr::getDebugLabel() const { + assert(isDebugLabel() && "not a DBG_LABEL"); + return cast<DILabel>(getOperand(0).getMetadata()); +} + const DILocalVariable *MachineInstr::getDebugVariable() const { assert(isDebugValue() && "not a DBG_VALUE"); return cast<DILocalVariable>(getOperand(2).getMetadata()); @@ -905,8 +958,7 @@ void MachineInstr::clearKillInfo() { } } -void MachineInstr::substituteRegister(unsigned FromReg, - unsigned ToReg, +void MachineInstr::substituteRegister(unsigned FromReg, unsigned ToReg, unsigned SubIdx, const TargetRegisterInfo &RegInfo) { if (TargetRegisterInfo::isPhysicalRegister(ToReg)) { @@ -941,7 +993,7 @@ bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const { return false; } - if (isPosition() || isDebugValue() || isTerminator() || + if (isPosition() || isDebugInstr() || isTerminator() || hasUnmodeledSideEffects()) return false; @@ -1195,8 +1247,12 @@ LLT MachineInstr::getTypeToPrint(unsigned OpIdx, SmallBitVector &PrintedTypes, if (PrintedTypes[OpInfo.getGenericTypeIndex()]) return LLT{}; - PrintedTypes.set(OpInfo.getGenericTypeIndex()); - return MRI.getType(Op.getReg()); + LLT TypeToPrint = MRI.getType(Op.getReg()); + // Don't mark the type index printed if it wasn't actually printed: maybe + // another operand with the same type index has an actual type attached: + if (TypeToPrint.isValid()) + PrintedTypes.set(OpInfo.getGenericTypeIndex()); + return TypeToPrint; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -1206,39 +1262,36 @@ LLVM_DUMP_METHOD void MachineInstr::dump() const { } #endif -void MachineInstr::print(raw_ostream &OS, bool SkipOpers, bool SkipDebugLoc, +void MachineInstr::print(raw_ostream &OS, bool IsStandalone, bool SkipOpers, + bool SkipDebugLoc, bool AddNewLine, const TargetInstrInfo *TII) const { const Module *M = nullptr; - if (const MachineBasicBlock *MBB = getParent()) - if (const MachineFunction *MF = MBB->getParent()) - M = MF->getFunction().getParent(); + const Function *F = nullptr; + if (const MachineFunction *MF = getMFIfAvailable(*this)) { + F = &MF->getFunction(); + M = F->getParent(); + if (!TII) + TII = MF->getSubtarget().getInstrInfo(); + } ModuleSlotTracker MST(M); - print(OS, MST, SkipOpers, SkipDebugLoc, TII); + if (F) + MST.incorporateFunction(*F); + print(OS, MST, IsStandalone, SkipOpers, SkipDebugLoc, TII); } void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, - bool SkipOpers, bool SkipDebugLoc, - const TargetInstrInfo *TII) const { + bool IsStandalone, bool SkipOpers, bool SkipDebugLoc, + bool AddNewLine, const TargetInstrInfo *TII) const { // We can be a bit tidier if we know the MachineFunction. const MachineFunction *MF = nullptr; const TargetRegisterInfo *TRI = nullptr; const MachineRegisterInfo *MRI = nullptr; const TargetIntrinsicInfo *IntrinsicInfo = nullptr; + tryToGetTargetInfo(*this, TRI, MRI, IntrinsicInfo, TII); - if (const MachineBasicBlock *MBB = getParent()) { - MF = MBB->getParent(); - if (MF) { - MRI = &MF->getRegInfo(); - TRI = MF->getSubtarget().getRegisterInfo(); - if (!TII) - TII = MF->getSubtarget().getInstrInfo(); - IntrinsicInfo = MF->getTarget().getIntrinsicInfo(); - } - } - - // Save a list of virtual registers. - SmallVector<unsigned, 8> VirtRegs; + if (isCFIInstruction()) + assert(getNumOperands() == 1 && "Expected 1 operand in CFI instruction"); SmallBitVector PrintedTypes(8); bool ShouldPrintRegisterTies = hasComplexRegisterTies(); @@ -1250,26 +1303,47 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, return findTiedOperandIdx(OpIdx); return 0U; }; + unsigned StartOp = 0; + unsigned e = getNumOperands(); + // Print explicitly defined operands on the left of an assignment syntax. - unsigned StartOp = 0, e = getNumOperands(); - for (; StartOp < e && getOperand(StartOp).isReg() && - getOperand(StartOp).isDef() && !getOperand(StartOp).isImplicit(); - ++StartOp) { + while (StartOp < e) { + const MachineOperand &MO = getOperand(StartOp); + if (!MO.isReg() || !MO.isDef() || MO.isImplicit()) + break; + if (StartOp != 0) OS << ", "; + LLT TypeToPrint = MRI ? getTypeToPrint(StartOp, PrintedTypes, *MRI) : LLT{}; unsigned TiedOperandIdx = getTiedOperandIdx(StartOp); - getOperand(StartOp).print(OS, MST, TypeToPrint, /*PrintDef=*/false, - ShouldPrintRegisterTies, TiedOperandIdx, TRI, - IntrinsicInfo); - unsigned Reg = getOperand(StartOp).getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) - VirtRegs.push_back(Reg); + MO.print(OS, MST, TypeToPrint, /*PrintDef=*/false, IsStandalone, + ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo); + ++StartOp; } if (StartOp != 0) OS << " = "; + if (getFlag(MachineInstr::FrameSetup)) + OS << "frame-setup "; + if (getFlag(MachineInstr::FrameDestroy)) + OS << "frame-destroy "; + if (getFlag(MachineInstr::FmNoNans)) + OS << "nnan "; + if (getFlag(MachineInstr::FmNoInfs)) + OS << "ninf "; + if (getFlag(MachineInstr::FmNsz)) + OS << "nsz "; + if (getFlag(MachineInstr::FmArcp)) + OS << "arcp "; + if (getFlag(MachineInstr::FmContract)) + OS << "contract "; + if (getFlag(MachineInstr::FmAfn)) + OS << "afn "; + if (getFlag(MachineInstr::FmReassoc)) + OS << "reassoc "; + // Print the opcode name. if (TII) OS << TII->getName(getOpcode()); @@ -1290,7 +1364,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, const unsigned OpIdx = InlineAsm::MIOp_AsmString; LLT TypeToPrint = MRI ? getTypeToPrint(OpIdx, PrintedTypes, *MRI) : LLT{}; unsigned TiedOperandIdx = getTiedOperandIdx(OpIdx); - getOperand(OpIdx).print(OS, MST, TypeToPrint, /*PrintDef=*/true, + getOperand(OpIdx).print(OS, MST, TypeToPrint, /*PrintDef=*/true, IsStandalone, ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo); @@ -1318,18 +1392,9 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) { const MachineOperand &MO = getOperand(i); - if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) - VirtRegs.push_back(MO.getReg()); - if (FirstOp) FirstOp = false; else OS << ","; OS << " "; - if (i < getDesc().NumOperands) { - const MCOperandInfo &MCOI = getDesc().OpInfo[i]; - if (MCOI.isPredicate()) - OS << "pred:"; - if (MCOI.isOptionalDef()) - OS << "opt:"; - } + if (isDebugValue() && MO.isMetadata()) { // Pretty print DBG_VALUE instructions. auto *DIV = dyn_cast<DILocalVariable>(MO.getMetadata()); @@ -1338,12 +1403,20 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, else { LLT TypeToPrint = MRI ? getTypeToPrint(i, PrintedTypes, *MRI) : LLT{}; unsigned TiedOperandIdx = getTiedOperandIdx(i); - MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true, + MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true, IsStandalone, + ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo); + } + } else if (isDebugLabel() && MO.isMetadata()) { + // Pretty print DBG_LABEL instructions. + auto *DIL = dyn_cast<DILabel>(MO.getMetadata()); + if (DIL && !DIL->getName().empty()) + OS << "\"" << DIL->getName() << '\"'; + else { + LLT TypeToPrint = MRI ? getTypeToPrint(i, PrintedTypes, *MRI) : LLT{}; + unsigned TiedOperandIdx = getTiedOperandIdx(i); + MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true, IsStandalone, ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo); } - } else if (TRI && (isInsertSubreg() || isRegSequence() || - (isSubregToReg() && i == 3)) && MO.isImm()) { - OS << TRI->getSubRegIndexName(MO.getImm()); } else if (i == AsmDescOp && MO.isImm()) { // Pretty print the inline asm operand descriptor. OS << '$' << AsmOpCount++; @@ -1406,77 +1479,66 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, LLT TypeToPrint = MRI ? getTypeToPrint(i, PrintedTypes, *MRI) : LLT{}; unsigned TiedOperandIdx = getTiedOperandIdx(i); if (MO.isImm() && isOperandSubregIdx(i)) - MachineOperand::printSubregIdx(OS, MO.getImm(), TRI); + MachineOperand::printSubRegIdx(OS, MO.getImm(), TRI); else - MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true, + MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true, IsStandalone, ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo); } } - bool HaveSemi = false; - const unsigned PrintableFlags = FrameSetup | FrameDestroy; - if (Flags & PrintableFlags) { - if (!HaveSemi) { - OS << ";"; - HaveSemi = true; + if (!SkipDebugLoc) { + if (const DebugLoc &DL = getDebugLoc()) { + if (!FirstOp) + OS << ','; + OS << " debug-location "; + DL->printAsOperand(OS, MST); } - OS << " flags: "; - - if (Flags & FrameSetup) - OS << "FrameSetup"; - - if (Flags & FrameDestroy) - OS << "FrameDestroy"; } if (!memoperands_empty()) { - if (!HaveSemi) { - OS << ";"; - HaveSemi = true; + SmallVector<StringRef, 0> SSNs; + const LLVMContext *Context = nullptr; + std::unique_ptr<LLVMContext> CtxPtr; + const MachineFrameInfo *MFI = nullptr; + if (const MachineFunction *MF = getMFIfAvailable(*this)) { + MFI = &MF->getFrameInfo(); + Context = &MF->getFunction().getContext(); + } else { + CtxPtr = llvm::make_unique<LLVMContext>(); + Context = CtxPtr.get(); } - OS << " mem:"; - for (mmo_iterator i = memoperands_begin(), e = memoperands_end(); - i != e; ++i) { - (*i)->print(OS, MST); - if (std::next(i) != e) - OS << " "; + OS << " :: "; + bool NeedComma = false; + for (const MachineMemOperand *Op : memoperands()) { + if (NeedComma) + OS << ", "; + Op->print(OS, MST, SSNs, *Context, MFI, TII); + NeedComma = true; } } - // Print the regclass of any virtual registers encountered. - if (MRI && !VirtRegs.empty()) { + if (SkipDebugLoc) + return; + + bool HaveSemi = false; + + // Print debug location information. + if (const DebugLoc &DL = getDebugLoc()) { if (!HaveSemi) { - OS << ";"; + OS << ';'; HaveSemi = true; } - for (unsigned i = 0; i != VirtRegs.size(); ++i) { - const RegClassOrRegBank &RC = MRI->getRegClassOrRegBank(VirtRegs[i]); - if (!RC) - continue; - // Generic virtual registers do not have register classes. - if (RC.is<const RegisterBank *>()) - OS << " " << RC.get<const RegisterBank *>()->getName(); - else - OS << " " - << TRI->getRegClassName(RC.get<const TargetRegisterClass *>()); - OS << ':' << printReg(VirtRegs[i]); - for (unsigned j = i+1; j != VirtRegs.size();) { - if (MRI->getRegClassOrRegBank(VirtRegs[j]) != RC) { - ++j; - continue; - } - if (VirtRegs[i] != VirtRegs[j]) - OS << "," << printReg(VirtRegs[j]); - VirtRegs.erase(VirtRegs.begin()+j); - } - } + OS << ' '; + DL.print(OS); } - // Print debug location information. + // Print extra comments for DEBUG_VALUE. if (isDebugValue() && getOperand(e - 2).isMetadata()) { - if (!HaveSemi) + if (!HaveSemi) { OS << ";"; + HaveSemi = true; + } auto *DV = cast<DILocalVariable>(getOperand(e - 2).getMetadata()); OS << " line no:" << DV->getLine(); if (auto *InlinedAt = debugLoc->getInlinedAt()) { @@ -1489,16 +1551,11 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, } if (isIndirectDebugValue()) OS << " indirect"; - } else if (SkipDebugLoc) { - return; - } else if (debugLoc && MF) { - if (!HaveSemi) - OS << ";"; - OS << " dbg:"; - debugLoc.print(OS); } + // TODO: DBG_LABEL - OS << '\n'; + if (AddNewLine) + OS << '\n'; } bool MachineInstr::addRegisterKilled(unsigned IncomingReg, @@ -1737,33 +1794,55 @@ MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL, assert(cast<DIExpression>(Expr)->isValid() && "not an expression"); assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); + auto MIB = BuildMI(MF, DL, MCID).addReg(Reg, RegState::Debug); if (IsIndirect) - return BuildMI(MF, DL, MCID) - .addReg(Reg, RegState::Debug) - .addImm(0U) - .addMetadata(Variable) - .addMetadata(Expr); + MIB.addImm(0U); else - return BuildMI(MF, DL, MCID) - .addReg(Reg, RegState::Debug) - .addReg(0U, RegState::Debug) - .addMetadata(Variable) - .addMetadata(Expr); + MIB.addReg(0U, RegState::Debug); + return MIB.addMetadata(Variable).addMetadata(Expr); } +MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL, + const MCInstrDesc &MCID, bool IsIndirect, + MachineOperand &MO, const MDNode *Variable, + const MDNode *Expr) { + assert(isa<DILocalVariable>(Variable) && "not a variable"); + assert(cast<DIExpression>(Expr)->isValid() && "not an expression"); + assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); + if (MO.isReg()) + return BuildMI(MF, DL, MCID, IsIndirect, MO.getReg(), Variable, Expr); + + auto MIB = BuildMI(MF, DL, MCID).add(MO); + if (IsIndirect) + MIB.addImm(0U); + else + MIB.addReg(0U, RegState::Debug); + return MIB.addMetadata(Variable).addMetadata(Expr); + } + MachineInstrBuilder llvm::BuildMI(MachineBasicBlock &BB, MachineBasicBlock::iterator I, const DebugLoc &DL, const MCInstrDesc &MCID, bool IsIndirect, unsigned Reg, const MDNode *Variable, const MDNode *Expr) { - assert(isa<DILocalVariable>(Variable) && "not a variable"); - assert(cast<DIExpression>(Expr)->isValid() && "not an expression"); MachineFunction &MF = *BB.getParent(); MachineInstr *MI = BuildMI(MF, DL, MCID, IsIndirect, Reg, Variable, Expr); BB.insert(I, MI); return MachineInstrBuilder(MF, MI); } +MachineInstrBuilder llvm::BuildMI(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + const DebugLoc &DL, const MCInstrDesc &MCID, + bool IsIndirect, MachineOperand &MO, + const MDNode *Variable, const MDNode *Expr) { + MachineFunction &MF = *BB.getParent(); + MachineInstr *MI = BuildMI(MF, DL, MCID, IsIndirect, MO, Variable, Expr); + BB.insert(I, MI); + return MachineInstrBuilder(MF, *MI); +} + /// Compute the new DIExpression to use with a DBG_VALUE for a spill slot. /// This prepends DW_OP_deref when spilling an indirect DBG_VALUE. static const DIExpression *computeExprForSpill(const MachineInstr &MI) { diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp index 75d449c7ac6f..7332b7162030 100644 --- a/contrib/llvm/lib/CodeGen/MachineLICM.cpp +++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp @@ -71,6 +71,10 @@ SinkInstsToAvoidSpills("sink-insts-to-avoid-spills", cl::desc("MachineLICM should sink instructions into " "loops to avoid register spills"), cl::init(false), cl::Hidden); +static cl::opt<bool> +HoistConstStores("hoist-const-stores", + cl::desc("Hoist invariant stores"), + cl::init(true), cl::Hidden); STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops"); @@ -82,17 +86,19 @@ STATISTIC(NumCSEed, "Number of hoisted machine instructions CSEed"); STATISTIC(NumPostRAHoisted, "Number of machine instructions hoisted out of loops post regalloc"); +STATISTIC(NumStoreConst, + "Number of stores of const phys reg hoisted out of loops"); namespace { - class MachineLICM : public MachineFunctionPass { + class MachineLICMBase : public MachineFunctionPass { const TargetInstrInfo *TII; const TargetLoweringBase *TLI; const TargetRegisterInfo *TRI; const MachineFrameInfo *MFI; MachineRegisterInfo *MRI; TargetSchedModel SchedModel; - bool PreRegAlloc = true; + bool PreRegAlloc; // Various analyses that we use... AliasAnalysis *AA; // Alias analysis info. @@ -138,16 +144,8 @@ namespace { unsigned SpeculationState; public: - static char ID; // Pass identification, replacement for typeid - - MachineLICM() : MachineFunctionPass(ID) { - initializeMachineLICMPass(*PassRegistry::getPassRegistry()); - } - - explicit MachineLICM(bool PreRA) - : MachineFunctionPass(ID), PreRegAlloc(PreRA) { - initializeMachineLICMPass(*PassRegistry::getPassRegistry()); - } + MachineLICMBase(char &PassID, bool PreRegAlloc) + : MachineFunctionPass(PassID), PreRegAlloc(PreRegAlloc) {} bool runOnMachineFunction(MachineFunction &MF) override; @@ -218,7 +216,7 @@ namespace { DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren, DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap); - void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode); + void HoistOutOfLoop(MachineDomTreeNode *HeaderN); void HoistRegion(MachineDomTreeNode *N, bool IsHeader); @@ -252,11 +250,29 @@ namespace { MachineBasicBlock *getCurPreheader(); }; + class MachineLICM : public MachineLICMBase { + public: + static char ID; + MachineLICM() : MachineLICMBase(ID, false) { + initializeMachineLICMPass(*PassRegistry::getPassRegistry()); + } + }; + + class EarlyMachineLICM : public MachineLICMBase { + public: + static char ID; + EarlyMachineLICM() : MachineLICMBase(ID, true) { + initializeEarlyMachineLICMPass(*PassRegistry::getPassRegistry()); + } + }; + } // end anonymous namespace -char MachineLICM::ID = 0; +char MachineLICM::ID; +char EarlyMachineLICM::ID; char &llvm::MachineLICMID = MachineLICM::ID; +char &llvm::EarlyMachineLICMID = EarlyMachineLICM::ID; INITIALIZE_PASS_BEGIN(MachineLICM, DEBUG_TYPE, "Machine Loop Invariant Code Motion", false, false) @@ -266,6 +282,14 @@ INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(MachineLICM, DEBUG_TYPE, "Machine Loop Invariant Code Motion", false, false) +INITIALIZE_PASS_BEGIN(EarlyMachineLICM, "early-machinelicm", + "Early Machine Loop Invariant Code Motion", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_END(EarlyMachineLICM, "early-machinelicm", + "Early Machine Loop Invariant Code Motion", false, false) + /// Test if the given loop is the outer-most loop that has a unique predecessor. static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { // Check whether this loop even has a unique predecessor. @@ -279,7 +303,7 @@ static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { return true; } -bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { +bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -290,15 +314,15 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { TRI = ST.getRegisterInfo(); MFI = &MF.getFrameInfo(); MRI = &MF.getRegInfo(); - SchedModel.init(ST.getSchedModel(), &ST, TII); + SchedModel.init(&ST); PreRegAlloc = MRI->isSSA(); if (PreRegAlloc) - DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: "); + LLVM_DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: "); else - DEBUG(dbgs() << "******** Post-regalloc Machine LICM: "); - DEBUG(dbgs() << MF.getName() << " ********\n"); + LLVM_DEBUG(dbgs() << "******** Post-regalloc Machine LICM: "); + LLVM_DEBUG(dbgs() << MF.getName() << " ********\n"); if (PreRegAlloc) { // Estimate register pressure during pre-regalloc pass. @@ -350,6 +374,10 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { /// Return true if instruction stores to the specified frame. static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { + // Check mayStore before memory operands so that e.g. DBG_VALUEs will return + // true since they have no memory operands. + if (!MI->mayStore()) + return false; // If we lost memory operands, conservatively assume that the instruction // writes to all slots. if (MI->memoperands_empty()) @@ -368,11 +396,11 @@ static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { /// Examine the instruction for potentai LICM candidate. Also /// gather register def and frame object update information. -void MachineLICM::ProcessMI(MachineInstr *MI, - BitVector &PhysRegDefs, - BitVector &PhysRegClobbers, - SmallSet<int, 32> &StoredFIs, - SmallVectorImpl<CandidateInfo> &Candidates) { +void MachineLICMBase::ProcessMI(MachineInstr *MI, + BitVector &PhysRegDefs, + BitVector &PhysRegClobbers, + SmallSet<int, 32> &StoredFIs, + SmallVectorImpl<CandidateInfo> &Candidates) { bool RuledOut = false; bool HasNonInvariantUse = false; unsigned Def = 0; @@ -455,7 +483,7 @@ void MachineLICM::ProcessMI(MachineInstr *MI, /// Walk the specified region of the CFG and hoist loop invariants out to the /// preheader. -void MachineLICM::HoistRegionPostRA() { +void MachineLICMBase::HoistRegionPostRA() { MachineBasicBlock *Preheader = getCurPreheader(); if (!Preheader) return; @@ -541,7 +569,7 @@ void MachineLICM::HoistRegionPostRA() { /// Add register 'Reg' to the livein sets of BBs in the current loop, and make /// sure it is not killed by any instructions in the loop. -void MachineLICM::AddToLiveIns(unsigned Reg) { +void MachineLICMBase::AddToLiveIns(unsigned Reg) { const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks(); for (MachineBasicBlock *BB : Blocks) { if (!BB->isLiveIn(Reg)) @@ -558,13 +586,14 @@ void MachineLICM::AddToLiveIns(unsigned Reg) { /// When an instruction is found to only use loop invariant operands that is /// safe to hoist, this instruction is called to do the dirty work. -void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { +void MachineLICMBase::HoistPostRA(MachineInstr *MI, unsigned Def) { MachineBasicBlock *Preheader = getCurPreheader(); // Now move the instructions to the predecessor, inserting it before any // terminator instructions. - DEBUG(dbgs() << "Hoisting to " << printMBBReference(*Preheader) << " from " - << printMBBReference(*MI->getParent()) << ": " << *MI); + LLVM_DEBUG(dbgs() << "Hoisting to " << printMBBReference(*Preheader) + << " from " << printMBBReference(*MI->getParent()) << ": " + << *MI); // Splice the instruction to the preheader. MachineBasicBlock *MBB = MI->getParent(); @@ -581,7 +610,7 @@ void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { /// Check if this mbb is guaranteed to execute. If not then a load from this mbb /// may not be safe to hoist. -bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) { +bool MachineLICMBase::IsGuaranteedToExecute(MachineBasicBlock *BB) { if (SpeculationState != SpeculateUnknown) return SpeculationState == SpeculateFalse; @@ -600,24 +629,24 @@ bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) { return true; } -void MachineLICM::EnterScope(MachineBasicBlock *MBB) { - DEBUG(dbgs() << "Entering " << printMBBReference(*MBB) << '\n'); +void MachineLICMBase::EnterScope(MachineBasicBlock *MBB) { + LLVM_DEBUG(dbgs() << "Entering " << printMBBReference(*MBB) << '\n'); // Remember livein register pressure. BackTrace.push_back(RegPressure); } -void MachineLICM::ExitScope(MachineBasicBlock *MBB) { - DEBUG(dbgs() << "Exiting " << printMBBReference(*MBB) << '\n'); +void MachineLICMBase::ExitScope(MachineBasicBlock *MBB) { + LLVM_DEBUG(dbgs() << "Exiting " << printMBBReference(*MBB) << '\n'); BackTrace.pop_back(); } /// Destroy scope for the MBB that corresponds to the given dominator tree node /// if its a leaf or all of its children are done. Walk up the dominator tree to /// destroy ancestors which are now done. -void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node, - DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, - DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) { +void MachineLICMBase::ExitScopeIfDone(MachineDomTreeNode *Node, + DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, + DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) { if (OpenChildren[Node]) return; @@ -638,7 +667,7 @@ void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node, /// specified header block, and that are in the current loop) in depth first /// order w.r.t the DominatorTree. This allows us to visit definitions before /// uses, allowing us to hoist a loop body in one pass without iteration. -void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { +void MachineLICMBase::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { MachineBasicBlock *Preheader = getCurPreheader(); if (!Preheader) return; @@ -708,6 +737,8 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { MachineInstr *MI = &*MII; if (!Hoist(MI, Preheader)) UpdateRegPressure(MI); + // If we have hoisted an instruction that may store, it can only be a + // constant store. MII = NextMII; } @@ -719,7 +750,7 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { /// Sink instructions into loops if profitable. This especially tries to prevent /// register spills caused by register pressure if there is little to no /// overhead moving instructions into loops. -void MachineLICM::SinkIntoLoop() { +void MachineLICMBase::SinkIntoLoop() { MachineBasicBlock *Preheader = getCurPreheader(); if (!Preheader) return; @@ -773,7 +804,7 @@ static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) { /// Find all virtual register references that are liveout of the preheader to /// initialize the starting "register pressure". Note this does not count live /// through (livein but not used) registers. -void MachineLICM::InitRegPressure(MachineBasicBlock *BB) { +void MachineLICMBase::InitRegPressure(MachineBasicBlock *BB) { std::fill(RegPressure.begin(), RegPressure.end(), 0); // If the preheader has only a single predecessor and it ends with a @@ -792,8 +823,8 @@ void MachineLICM::InitRegPressure(MachineBasicBlock *BB) { } /// Update estimate of register pressure after the specified instruction. -void MachineLICM::UpdateRegPressure(const MachineInstr *MI, - bool ConsiderUnseenAsDef) { +void MachineLICMBase::UpdateRegPressure(const MachineInstr *MI, + bool ConsiderUnseenAsDef) { auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/true, ConsiderUnseenAsDef); for (const auto &RPIdAndCost : Cost) { unsigned Class = RPIdAndCost.first; @@ -811,8 +842,8 @@ void MachineLICM::UpdateRegPressure(const MachineInstr *MI, /// figure out which usages are live-ins. /// FIXME: Figure out a way to consider 'RegSeen' from all code paths. DenseMap<unsigned, int> -MachineLICM::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen, - bool ConsiderUnseenAsDef) { +MachineLICMBase::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen, + bool ConsiderUnseenAsDef) { DenseMap<unsigned, int> Cost; if (MI->isImplicitDef()) return Cost; @@ -871,13 +902,86 @@ static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) { return false; } +// This function iterates through all the operands of the input store MI and +// checks that each register operand statisfies isCallerPreservedPhysReg. +// This means, the value being stored and the address where it is being stored +// is constant throughout the body of the function (not including prologue and +// epilogue). When called with an MI that isn't a store, it returns false. +// A future improvement can be to check if the store registers are constant +// throughout the loop rather than throughout the funtion. +static bool isInvariantStore(const MachineInstr &MI, + const TargetRegisterInfo *TRI, + const MachineRegisterInfo *MRI) { + + bool FoundCallerPresReg = false; + if (!MI.mayStore() || MI.hasUnmodeledSideEffects() || + (MI.getNumOperands() == 0)) + return false; + + // Check that all register operands are caller-preserved physical registers. + for (const MachineOperand &MO : MI.operands()) { + if (MO.isReg()) { + unsigned Reg = MO.getReg(); + // If operand is a virtual register, check if it comes from a copy of a + // physical register. + if (TargetRegisterInfo::isVirtualRegister(Reg)) + Reg = TRI->lookThruCopyLike(MO.getReg(), MRI); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + return false; + if (!TRI->isCallerPreservedPhysReg(Reg, *MI.getMF())) + return false; + else + FoundCallerPresReg = true; + } else if (!MO.isImm()) { + return false; + } + } + return FoundCallerPresReg; +} + +// Return true if the input MI is a copy instruction that feeds an invariant +// store instruction. This means that the src of the copy has to satisfy +// isCallerPreservedPhysReg and atleast one of it's users should satisfy +// isInvariantStore. +static bool isCopyFeedingInvariantStore(const MachineInstr &MI, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI) { + + // FIXME: If targets would like to look through instructions that aren't + // pure copies, this can be updated to a query. + if (!MI.isCopy()) + return false; + + const MachineFunction *MF = MI.getMF(); + // Check that we are copying a constant physical register. + unsigned CopySrcReg = MI.getOperand(1).getReg(); + if (TargetRegisterInfo::isVirtualRegister(CopySrcReg)) + return false; + + if (!TRI->isCallerPreservedPhysReg(CopySrcReg, *MF)) + return false; + + unsigned CopyDstReg = MI.getOperand(0).getReg(); + // Check if any of the uses of the copy are invariant stores. + assert (TargetRegisterInfo::isVirtualRegister(CopyDstReg) && + "copy dst is not a virtual reg"); + + for (MachineInstr &UseMI : MRI->use_instructions(CopyDstReg)) { + if (UseMI.mayStore() && isInvariantStore(UseMI, TRI, MRI)) + return true; + } + return false; +} + /// Returns true if the instruction may be a suitable candidate for LICM. /// e.g. If the instruction is a call, then it's obviously not safe to hoist it. -bool MachineLICM::IsLICMCandidate(MachineInstr &I) { +bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) { // Check if it's safe to move the instruction. bool DontMoveAcrossStore = true; - if (!I.isSafeToMove(AA, DontMoveAcrossStore)) + if ((!I.isSafeToMove(AA, DontMoveAcrossStore)) && + !(HoistConstStores && isInvariantStore(I, TRI, MRI))) { return false; + } // If it is load then check if it is guaranteed to execute by making sure that // it dominates all exiting blocks. If it doesn't, then there is a path out of @@ -896,7 +1000,7 @@ bool MachineLICM::IsLICMCandidate(MachineInstr &I) { /// I.e., all virtual register operands are defined outside of the loop, /// physical registers aren't accessed explicitly, and there are no side /// effects that aren't captured by the operands or other flags. -bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { +bool MachineLICMBase::IsLoopInvariantInst(MachineInstr &I) { if (!IsLICMCandidate(I)) return false; @@ -949,7 +1053,7 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { /// Return true if the specified instruction is used by a phi node and hoisting /// it could cause a copy to be inserted. -bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const { +bool MachineLICMBase::HasLoopPHIUse(const MachineInstr *MI) const { SmallVector<const MachineInstr*, 8> Work(1, MI); do { MI = Work.pop_back_val(); @@ -984,8 +1088,9 @@ bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const { /// Compute operand latency between a def of 'Reg' and an use in the current /// loop, return true if the target considered it high. -bool MachineLICM::HasHighOperandLatency(MachineInstr &MI, - unsigned DefIdx, unsigned Reg) const { +bool MachineLICMBase::HasHighOperandLatency(MachineInstr &MI, + unsigned DefIdx, + unsigned Reg) const { if (MRI->use_nodbg_empty(Reg)) return false; @@ -1015,7 +1120,7 @@ bool MachineLICM::HasHighOperandLatency(MachineInstr &MI, /// Return true if the instruction is marked "cheap" or the operand latency /// between its def and a use is one or less. -bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const { +bool MachineLICMBase::IsCheapInstruction(MachineInstr &MI) const { if (TII->isAsCheapAsAMove(MI) || MI.isCopyLike()) return true; @@ -1040,8 +1145,9 @@ bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const { /// Visit BBs from header to current BB, check if hoisting an instruction of the /// given cost matrix can cause high register pressure. -bool MachineLICM::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost, - bool CheapInstr) { +bool +MachineLICMBase::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost, + bool CheapInstr) { for (const auto &RPIdAndCost : Cost) { if (RPIdAndCost.second <= 0) continue; @@ -1065,7 +1171,7 @@ bool MachineLICM::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost, /// Traverse the back trace from header to the current block and update their /// register pressures to reflect the effect of hoisting MI from the current /// block to the preheader. -void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) { +void MachineLICMBase::UpdateBackTraceRegPressure(const MachineInstr *MI) { // First compute the 'cost' of the instruction, i.e. its contribution // to register pressure. auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/false, @@ -1079,7 +1185,7 @@ void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) { /// Return true if it is potentially profitable to hoist the given loop /// invariant. -bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { +bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) { if (MI.isImplicitDef()) return true; @@ -1095,12 +1201,15 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { // - When hoisting the last use of a value in the loop, that value no longer // needs to be live in the loop. This lowers register pressure in the loop. + if (HoistConstStores && isCopyFeedingInvariantStore(MI, MRI, TRI)) + return true; + bool CheapInstr = IsCheapInstruction(MI); bool CreatesCopy = HasLoopPHIUse(&MI); // Don't hoist a cheap instruction if it would create a copy in the loop. if (CheapInstr && CreatesCopy) { - DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI); + LLVM_DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI); return false; } @@ -1119,7 +1228,7 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; if (MO.isDef() && HasHighOperandLatency(MI, i, Reg)) { - DEBUG(dbgs() << "Hoist High Latency: " << MI); + LLVM_DEBUG(dbgs() << "Hoist High Latency: " << MI); ++NumHighLatency; return true; } @@ -1137,14 +1246,14 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { // Visit BBs from header to current BB, if hoisting this doesn't cause // high register pressure, then it's safe to proceed. if (!CanCauseHighRegPressure(Cost, CheapInstr)) { - DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI); + LLVM_DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI); ++NumLowRP; return true; } // Don't risk increasing register pressure if it would create copies. if (CreatesCopy) { - DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI); + LLVM_DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI); return false; } @@ -1153,7 +1262,7 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { // conservative. if (AvoidSpeculation && (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) { - DEBUG(dbgs() << "Won't speculate: " << MI); + LLVM_DEBUG(dbgs() << "Won't speculate: " << MI); return false; } @@ -1161,7 +1270,7 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { // to be remat'ed. if (!TII->isTriviallyReMaterializable(MI, AA) && !MI.isDereferenceableInvariantLoad(AA)) { - DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI); + LLVM_DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI); return false; } @@ -1171,7 +1280,7 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { /// Unfold a load from the given machineinstr if the load itself could be /// hoisted. Return the unfolded and hoistable load, or null if the load /// couldn't be unfolded or if it wouldn't be hoistable. -MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { +MachineInstr *MachineLICMBase::ExtractHoistableLoad(MachineInstr *MI) { // Don't unfold simple loads. if (MI->canFoldAsLoad()) return nullptr; @@ -1229,7 +1338,7 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { /// Initialize the CSE map with instructions that are in the current loop /// preheader that may become duplicates of instructions that are hoisted /// out of the loop. -void MachineLICM::InitCSEMap(MachineBasicBlock *BB) { +void MachineLICMBase::InitCSEMap(MachineBasicBlock *BB) { for (MachineInstr &MI : *BB) CSEMap[MI.getOpcode()].push_back(&MI); } @@ -1237,8 +1346,8 @@ void MachineLICM::InitCSEMap(MachineBasicBlock *BB) { /// Find an instruction amount PrevMIs that is a duplicate of MI. /// Return this instruction if it's found. const MachineInstr* -MachineLICM::LookForDuplicate(const MachineInstr *MI, - std::vector<const MachineInstr*> &PrevMIs) { +MachineLICMBase::LookForDuplicate(const MachineInstr *MI, + std::vector<const MachineInstr*> &PrevMIs) { for (const MachineInstr *PrevMI : PrevMIs) if (TII->produceSameValue(*MI, *PrevMI, (PreRegAlloc ? MRI : nullptr))) return PrevMI; @@ -1250,15 +1359,15 @@ MachineLICM::LookForDuplicate(const MachineInstr *MI, /// computes the same value. If it's found, do a RAU on with the definition of /// the existing instruction rather than hoisting the instruction to the /// preheader. -bool MachineLICM::EliminateCSE(MachineInstr *MI, - DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator &CI) { +bool MachineLICMBase::EliminateCSE(MachineInstr *MI, + DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator &CI) { // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate // the undef property onto uses. if (CI == CSEMap.end() || MI->isImplicitDef()) return false; if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) { - DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup); + LLVM_DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup); // Replace virtual registers defined by MI by their counterparts defined // by Dup. @@ -1308,7 +1417,7 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI, /// Return true if the given instruction will be CSE'd if it's hoisted out of /// the loop. -bool MachineLICM::MayCSE(MachineInstr *MI) { +bool MachineLICMBase::MayCSE(MachineInstr *MI) { unsigned Opcode = MI->getOpcode(); DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator CI = CSEMap.find(Opcode); @@ -1323,7 +1432,7 @@ bool MachineLICM::MayCSE(MachineInstr *MI) { /// When an instruction is found to use only loop invariant operands /// that are safe to hoist, this instruction is called to do the dirty work. /// It returns true if the instruction is hoisted. -bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { +bool MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { // First check whether we should hoist this instruction. if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) { // If not, try unfolding a hoistable load. @@ -1331,16 +1440,21 @@ bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { if (!MI) return false; } + // If we have hoisted an instruction that may store, it can only be a constant + // store. + if (MI->mayStore()) + NumStoreConst++; + // Now move the instructions to the predecessor, inserting it before any // terminator instructions. - DEBUG({ - dbgs() << "Hoisting " << *MI; - if (MI->getParent()->getBasicBlock()) - dbgs() << " from " << printMBBReference(*MI->getParent()); - if (Preheader->getBasicBlock()) - dbgs() << " to " << printMBBReference(*Preheader); - dbgs() << "\n"; - }); + LLVM_DEBUG({ + dbgs() << "Hoisting " << *MI; + if (MI->getParent()->getBasicBlock()) + dbgs() << " from " << printMBBReference(*MI->getParent()); + if (Preheader->getBasicBlock()) + dbgs() << " to " << printMBBReference(*Preheader); + dbgs() << "\n"; + }); // If this is the first instruction being hoisted to the preheader, // initialize the CSE map with potential common expressions. @@ -1386,7 +1500,7 @@ bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { } /// Get the preheader for the current loop, splitting a critical edge if needed. -MachineBasicBlock *MachineLICM::getCurPreheader() { +MachineBasicBlock *MachineLICMBase::getCurPreheader() { // Determine the block to which to hoist instructions. If we can't find a // suitable loop predecessor, we can't do any hoisting. diff --git a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp index a9aa1d954e70..2bce59235057 100644 --- a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp @@ -18,6 +18,7 @@ #include "llvm/Analysis/LoopInfoImpl.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/Config/llvm-config.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp index 8f0b89657d02..054cc97f8374 100644 --- a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp @@ -15,7 +15,6 @@ #include "llvm/ADT/TinyPtrVector.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/TargetLoweringObjectFile.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instructions.h" @@ -27,6 +26,7 @@ #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include <algorithm> #include <cassert> diff --git a/contrib/llvm/lib/CodeGen/MachineOperand.cpp b/contrib/llvm/lib/CodeGen/MachineOperand.cpp index ec81c6391171..8098333832b4 100644 --- a/contrib/llvm/lib/CodeGen/MachineOperand.cpp +++ b/contrib/llvm/lib/CodeGen/MachineOperand.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/Loads.h" #include "llvm/CodeGen/MIRPrinter.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -19,6 +20,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/Constants.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/ModuleSlotTracker.h" @@ -50,6 +52,9 @@ void MachineOperand::setReg(unsigned Reg) { if (getReg() == Reg) return; // No change. + // Clear the IsRenamable bit to keep it conservatively correct. + IsRenamable = false; + // Otherwise, we have to change the register. If this operand is embedded // into a machine function, we need to update the old and new register's // use/def lists. @@ -110,30 +115,27 @@ bool MachineOperand::isRenamable() const { assert(isReg() && "Wrong MachineOperand accessor"); assert(TargetRegisterInfo::isPhysicalRegister(getReg()) && "isRenamable should only be checked on physical registers"); - return IsRenamable; + if (!IsRenamable) + return false; + + const MachineInstr *MI = getParent(); + if (!MI) + return true; + + if (isDef()) + return !MI->hasExtraDefRegAllocReq(MachineInstr::IgnoreBundle); + + assert(isUse() && "Reg is not def or use"); + return !MI->hasExtraSrcRegAllocReq(MachineInstr::IgnoreBundle); } void MachineOperand::setIsRenamable(bool Val) { assert(isReg() && "Wrong MachineOperand accessor"); assert(TargetRegisterInfo::isPhysicalRegister(getReg()) && "setIsRenamable should only be called on physical registers"); - if (const MachineInstr *MI = getParent()) - if ((isDef() && MI->hasExtraDefRegAllocReq()) || - (isUse() && MI->hasExtraSrcRegAllocReq())) - assert(!Val && "isRenamable should be false for " - "hasExtraDefRegAllocReq/hasExtraSrcRegAllocReq opcodes"); IsRenamable = Val; } -void MachineOperand::setIsRenamableIfNoExtraRegAllocReq() { - if (const MachineInstr *MI = getParent()) - if ((isDef() && MI->hasExtraDefRegAllocReq()) || - (isUse() && MI->hasExtraSrcRegAllocReq())) - return; - - setIsRenamable(true); -} - // If this operand is currently a register operand, and if this is in a // function, deregister the operand from the register's use/def list. void MachineOperand::removeRegFromUses() { @@ -440,7 +442,70 @@ static void printIRBlockReference(raw_ostream &OS, const BasicBlock &BB, OS << "<unknown>"; } -void MachineOperand::printSubregIdx(raw_ostream &OS, uint64_t Index, +static void printIRValueReference(raw_ostream &OS, const Value &V, + ModuleSlotTracker &MST) { + if (isa<GlobalValue>(V)) { + V.printAsOperand(OS, /*PrintType=*/false, MST); + return; + } + if (isa<Constant>(V)) { + // Machine memory operands can load/store to/from constant value pointers. + OS << '`'; + V.printAsOperand(OS, /*PrintType=*/true, MST); + OS << '`'; + return; + } + OS << "%ir."; + if (V.hasName()) { + printLLVMNameWithoutPrefix(OS, V.getName()); + return; + } + MachineOperand::printIRSlotNumber(OS, MST.getLocalSlot(&V)); +} + +static void printSyncScope(raw_ostream &OS, const LLVMContext &Context, + SyncScope::ID SSID, + SmallVectorImpl<StringRef> &SSNs) { + switch (SSID) { + case SyncScope::System: + break; + default: + if (SSNs.empty()) + Context.getSyncScopeNames(SSNs); + + OS << "syncscope(\""; + printEscapedString(SSNs[SSID], OS); + OS << "\") "; + break; + } +} + +static const char *getTargetMMOFlagName(const TargetInstrInfo &TII, + unsigned TMMOFlag) { + auto Flags = TII.getSerializableMachineMemOperandTargetFlags(); + for (const auto &I : Flags) { + if (I.first == TMMOFlag) { + return I.second; + } + } + return nullptr; +} + +static void printFrameIndex(raw_ostream& OS, int FrameIndex, bool IsFixed, + const MachineFrameInfo *MFI) { + StringRef Name; + if (MFI) { + IsFixed = MFI->isFixedObjectIndex(FrameIndex); + if (const AllocaInst *Alloca = MFI->getObjectAllocation(FrameIndex)) + if (Alloca->hasName()) + Name = Alloca->getName(); + if (IsFixed) + FrameIndex -= MFI->getObjectIndexBegin(); + } + MachineOperand::printStackObjectReference(OS, FrameIndex, IsFixed, Name); +} + +void MachineOperand::printSubRegIdx(raw_ostream &OS, uint64_t Index, const TargetRegisterInfo *TRI) { OS << "%subreg."; if (TRI) @@ -639,15 +704,21 @@ static void printCFI(raw_ostream &OS, const MCCFIInstruction &CFI, void MachineOperand::print(raw_ostream &OS, const TargetRegisterInfo *TRI, const TargetIntrinsicInfo *IntrinsicInfo) const { + print(OS, LLT{}, TRI, IntrinsicInfo); +} + +void MachineOperand::print(raw_ostream &OS, LLT TypeToPrint, + const TargetRegisterInfo *TRI, + const TargetIntrinsicInfo *IntrinsicInfo) const { tryToGetTargetInfo(*this, TRI, IntrinsicInfo); ModuleSlotTracker DummyMST(nullptr); - print(OS, DummyMST, LLT{}, /*PrintDef=*/false, + print(OS, DummyMST, TypeToPrint, /*PrintDef=*/false, /*IsStandalone=*/true, /*ShouldPrintRegisterTies=*/true, /*TiedOperandIdx=*/0, TRI, IntrinsicInfo); } void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, - LLT TypeToPrint, bool PrintDef, + LLT TypeToPrint, bool PrintDef, bool IsStandalone, bool ShouldPrintRegisterTies, unsigned TiedOperandIdx, const TargetRegisterInfo *TRI, @@ -675,7 +746,15 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << "debug-use "; if (TargetRegisterInfo::isPhysicalRegister(getReg()) && isRenamable()) OS << "renamable "; - OS << printReg(Reg, TRI); + + const MachineRegisterInfo *MRI = nullptr; + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (const MachineFunction *MF = getMFIfAvailable(*this)) { + MRI = &MF->getRegInfo(); + } + } + + OS << printReg(Reg, TRI, 0, MRI); // Print the sub register. if (unsigned SubReg = getSubReg()) { if (TRI) @@ -687,7 +766,7 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, if (TargetRegisterInfo::isVirtualRegister(Reg)) { if (const MachineFunction *MF = getMFIfAvailable(*this)) { const MachineRegisterInfo &MRI = MF->getRegInfo(); - if (!PrintDef || MRI.def_empty(Reg)) { + if (IsStandalone || !PrintDef || MRI.def_empty(Reg)) { OS << ':'; OS << printRegClassOrBank(Reg, MRI, TRI); } @@ -716,17 +795,10 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, case MachineOperand::MO_FrameIndex: { int FrameIndex = getIndex(); bool IsFixed = false; - StringRef Name; - if (const MachineFunction *MF = getMFIfAvailable(*this)) { - const MachineFrameInfo &MFI = MF->getFrameInfo(); - IsFixed = MFI.isFixedObjectIndex(FrameIndex); - if (const AllocaInst *Alloca = MFI.getObjectAllocation(FrameIndex)) - if (Alloca->hasName()) - Name = Alloca->getName(); - if (IsFixed) - FrameIndex -= MFI.getObjectIndexBegin(); - } - printStackObjectReference(OS, FrameIndex, IsFixed, Name); + const MachineFrameInfo *MFI = nullptr; + if (const MachineFunction *MF = getMFIfAvailable(*this)) + MFI = &MF->getFrameInfo(); + printFrameIndex(OS, FrameIndex, IsFixed, MFI); break; } case MachineOperand::MO_ConstantPoolIndex: @@ -752,7 +824,7 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, break; case MachineOperand::MO_ExternalSymbol: { StringRef Name = getSymbolName(); - OS << '$'; + OS << '&'; if (Name.empty()) { OS << "\"\""; } else { @@ -905,7 +977,7 @@ MachinePointerInfo MachinePointerInfo::getUnknownStack(MachineFunction &MF) { } MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f, - uint64_t s, unsigned int a, + uint64_t s, uint64_t a, const AAMDNodes &AAInfo, const MDNode *Ranges, SyncScope::ID SSID, AtomicOrdering Ordering, @@ -961,108 +1033,121 @@ void MachineMemOperand::print(raw_ostream &OS) const { ModuleSlotTracker DummyMST(nullptr); print(OS, DummyMST); } + void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST) const { - assert((isLoad() || isStore()) && "SV has to be a load, store or both."); + SmallVector<StringRef, 0> SSNs; + LLVMContext Ctx; + print(OS, MST, SSNs, Ctx, nullptr, nullptr); +} +void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, + SmallVectorImpl<StringRef> &SSNs, + const LLVMContext &Context, + const MachineFrameInfo *MFI, + const TargetInstrInfo *TII) const { + OS << '('; if (isVolatile()) - OS << "Volatile "; - + OS << "volatile "; + if (isNonTemporal()) + OS << "non-temporal "; + if (isDereferenceable()) + OS << "dereferenceable "; + if (isInvariant()) + OS << "invariant "; + if (getFlags() & MachineMemOperand::MOTargetFlag1) + OS << '"' << getTargetMMOFlagName(*TII, MachineMemOperand::MOTargetFlag1) + << "\" "; + if (getFlags() & MachineMemOperand::MOTargetFlag2) + OS << '"' << getTargetMMOFlagName(*TII, MachineMemOperand::MOTargetFlag2) + << "\" "; + if (getFlags() & MachineMemOperand::MOTargetFlag3) + OS << '"' << getTargetMMOFlagName(*TII, MachineMemOperand::MOTargetFlag3) + << "\" "; + + assert((isLoad() || isStore()) && + "machine memory operand must be a load or store (or both)"); if (isLoad()) - OS << "LD"; + OS << "load "; if (isStore()) - OS << "ST"; - OS << getSize(); + OS << "store "; - // Print the address information. - OS << "["; - if (const Value *V = getValue()) - V->printAsOperand(OS, /*PrintType=*/false, MST); - else if (const PseudoSourceValue *PSV = getPseudoValue()) - PSV->printCustom(OS); - else - OS << "<unknown>"; + printSyncScope(OS, Context, getSyncScopeID(), SSNs); - unsigned AS = getAddrSpace(); - if (AS != 0) - OS << "(addrspace=" << AS << ')'; - - // If the alignment of the memory reference itself differs from the alignment - // of the base pointer, print the base alignment explicitly, next to the base - // pointer. - if (getBaseAlignment() != getAlignment()) - OS << "(align=" << getBaseAlignment() << ")"; - - if (getOffset() != 0) - OS << "+" << getOffset(); - OS << "]"; - - // Print the alignment of the reference. - if (getBaseAlignment() != getAlignment() || getBaseAlignment() != getSize()) - OS << "(align=" << getAlignment() << ")"; - - // Print TBAA info. - if (const MDNode *TBAAInfo = getAAInfo().TBAA) { - OS << "(tbaa="; - if (TBAAInfo->getNumOperands() > 0) - TBAAInfo->getOperand(0)->printAsOperand(OS, MST); - else - OS << "<unknown>"; - OS << ")"; - } + if (getOrdering() != AtomicOrdering::NotAtomic) + OS << toIRString(getOrdering()) << ' '; + if (getFailureOrdering() != AtomicOrdering::NotAtomic) + OS << toIRString(getFailureOrdering()) << ' '; - // Print AA scope info. - if (const MDNode *ScopeInfo = getAAInfo().Scope) { - OS << "(alias.scope="; - if (ScopeInfo->getNumOperands() > 0) - for (unsigned i = 0, ie = ScopeInfo->getNumOperands(); i != ie; ++i) { - ScopeInfo->getOperand(i)->printAsOperand(OS, MST); - if (i != ie - 1) - OS << ","; - } - else - OS << "<unknown>"; - OS << ")"; + OS << getSize(); + if (const Value *Val = getValue()) { + OS << ((isLoad() && isStore()) ? " on " : isLoad() ? " from " : " into "); + printIRValueReference(OS, *Val, MST); + } else if (const PseudoSourceValue *PVal = getPseudoValue()) { + OS << ((isLoad() && isStore()) ? " on " : isLoad() ? " from " : " into "); + assert(PVal && "Expected a pseudo source value"); + switch (PVal->kind()) { + case PseudoSourceValue::Stack: + OS << "stack"; + break; + case PseudoSourceValue::GOT: + OS << "got"; + break; + case PseudoSourceValue::JumpTable: + OS << "jump-table"; + break; + case PseudoSourceValue::ConstantPool: + OS << "constant-pool"; + break; + case PseudoSourceValue::FixedStack: { + int FrameIndex = cast<FixedStackPseudoSourceValue>(PVal)->getFrameIndex(); + bool IsFixed = true; + printFrameIndex(OS, FrameIndex, IsFixed, MFI); + break; + } + case PseudoSourceValue::GlobalValueCallEntry: + OS << "call-entry "; + cast<GlobalValuePseudoSourceValue>(PVal)->getValue()->printAsOperand( + OS, /*PrintType=*/false, MST); + break; + case PseudoSourceValue::ExternalSymbolCallEntry: + OS << "call-entry &"; + printLLVMNameWithoutPrefix( + OS, cast<ExternalSymbolPseudoSourceValue>(PVal)->getSymbol()); + break; + case PseudoSourceValue::TargetCustom: + // FIXME: This is not necessarily the correct MIR serialization format for + // a custom pseudo source value, but at least it allows + // -print-machineinstrs to work on a target with custom pseudo source + // values. + OS << "custom "; + PVal->printCustom(OS); + break; + } } - - // Print AA noalias scope info. - if (const MDNode *NoAliasInfo = getAAInfo().NoAlias) { - OS << "(noalias="; - if (NoAliasInfo->getNumOperands() > 0) - for (unsigned i = 0, ie = NoAliasInfo->getNumOperands(); i != ie; ++i) { - NoAliasInfo->getOperand(i)->printAsOperand(OS, MST); - if (i != ie - 1) - OS << ","; - } - else - OS << "<unknown>"; - OS << ")"; + MachineOperand::printOperandOffset(OS, getOffset()); + if (getBaseAlignment() != getSize()) + OS << ", align " << getBaseAlignment(); + auto AAInfo = getAAInfo(); + if (AAInfo.TBAA) { + OS << ", !tbaa "; + AAInfo.TBAA->printAsOperand(OS, MST); } - - if (const MDNode *Ranges = getRanges()) { - unsigned NumRanges = Ranges->getNumOperands(); - if (NumRanges != 0) { - OS << "(ranges="; - - for (unsigned I = 0; I != NumRanges; ++I) { - Ranges->getOperand(I)->printAsOperand(OS, MST); - if (I != NumRanges - 1) - OS << ','; - } - - OS << ')'; - } + if (AAInfo.Scope) { + OS << ", !alias.scope "; + AAInfo.Scope->printAsOperand(OS, MST); } + if (AAInfo.NoAlias) { + OS << ", !noalias "; + AAInfo.NoAlias->printAsOperand(OS, MST); + } + if (getRanges()) { + OS << ", !range "; + getRanges()->printAsOperand(OS, MST); + } + // FIXME: Implement addrspace printing/parsing in MIR. + // For now, print this even though parsing it is not available in MIR. + if (unsigned AS = getAddrSpace()) + OS << ", addrspace " << AS; - if (isNonTemporal()) - OS << "(nontemporal)"; - if (isDereferenceable()) - OS << "(dereferenceable)"; - if (isInvariant()) - OS << "(invariant)"; - if (getFlags() & MOTargetFlag1) - OS << "(flag1)"; - if (getFlags() & MOTargetFlag2) - OS << "(flag2)"; - if (getFlags() & MOTargetFlag3) - OS << "(flag3)"; + OS << ')'; } diff --git a/contrib/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/contrib/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp index ca4452218da1..906d5560d568 100644 --- a/contrib/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp +++ b/contrib/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp @@ -27,7 +27,8 @@ DiagnosticInfoMIROptimization::MachineArgument::MachineArgument( Key = MKey; raw_string_ostream OS(Val); - MI.print(OS, /*SkipOpers=*/false, /*SkipDebugLoc=*/true); + MI.print(OS, /*IsStandalone=*/true, /*SkipOpers=*/false, + /*SkipDebugLoc=*/true); } Optional<uint64_t> diff --git a/contrib/llvm/lib/CodeGen/MachineOutliner.cpp b/contrib/llvm/lib/CodeGen/MachineOutliner.cpp index e4eb8802ac66..28e4e2c6c87a 100644 --- a/contrib/llvm/lib/CodeGen/MachineOutliner.cpp +++ b/contrib/llvm/lib/CodeGen/MachineOutliner.cpp @@ -25,9 +25,8 @@ /// /// Targets must implement /// * getOutliningCandidateInfo -/// * insertOutlinerEpilogue +/// * buildOutlinedFrame /// * insertOutlinedCall -/// * insertOutlinerPrologue /// * isFunctionSafeToOutlineFrom /// /// in order to make use of the MachineOutliner. @@ -56,18 +55,22 @@ /// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf /// //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/MachineOutliner.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DIBuilder.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Mangler.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include <functional> @@ -80,121 +83,23 @@ using namespace llvm; using namespace ore; +using namespace outliner; STATISTIC(NumOutlined, "Number of candidates outlined"); STATISTIC(FunctionsCreated, "Number of functions created"); -namespace { - -/// \brief An individual sequence of instructions to be replaced with a call to -/// an outlined function. -struct Candidate { -private: - /// The start index of this \p Candidate in the instruction list. - unsigned StartIdx; - - /// The number of instructions in this \p Candidate. - unsigned Len; - -public: - /// Set to false if the candidate overlapped with another candidate. - bool InCandidateList = true; - - /// \brief The index of this \p Candidate's \p OutlinedFunction in the list of - /// \p OutlinedFunctions. - unsigned FunctionIdx; - - /// Contains all target-specific information for this \p Candidate. - TargetInstrInfo::MachineOutlinerInfo MInfo; - - /// Return the number of instructions in this Candidate. - unsigned getLength() const { return Len; } - - /// Return the start index of this candidate. - unsigned getStartIdx() const { return StartIdx; } - - // Return the end index of this candidate. - unsigned getEndIdx() const { return StartIdx + Len - 1; } - - /// \brief The number of instructions that would be saved by outlining every - /// candidate of this type. - /// - /// This is a fixed value which is not updated during the candidate pruning - /// process. It is only used for deciding which candidate to keep if two - /// candidates overlap. The true benefit is stored in the OutlinedFunction - /// for some given candidate. - unsigned Benefit = 0; - - Candidate(unsigned StartIdx, unsigned Len, unsigned FunctionIdx) - : StartIdx(StartIdx), Len(Len), FunctionIdx(FunctionIdx) {} - - Candidate() {} - - /// \brief Used to ensure that \p Candidates are outlined in an order that - /// preserves the start and end indices of other \p Candidates. - bool operator<(const Candidate &RHS) const { - return getStartIdx() > RHS.getStartIdx(); - } -}; - -/// \brief The information necessary to create an outlined function for some -/// class of candidate. -struct OutlinedFunction { - -private: - /// The number of candidates for this \p OutlinedFunction. - unsigned OccurrenceCount = 0; - -public: - std::vector<std::shared_ptr<Candidate>> Candidates; - - /// The actual outlined function created. - /// This is initialized after we go through and create the actual function. - MachineFunction *MF = nullptr; - - /// A number assigned to this function which appears at the end of its name. - unsigned Name; +// Set to true if the user wants the outliner to run on linkonceodr linkage +// functions. This is false by default because the linker can dedupe linkonceodr +// functions. Since the outliner is confined to a single module (modulo LTO), +// this is off by default. It should, however, be the default behaviour in +// LTO. +static cl::opt<bool> EnableLinkOnceODROutlining( + "enable-linkonceodr-outlining", + cl::Hidden, + cl::desc("Enable the machine outliner on linkonceodr functions"), + cl::init(false)); - /// \brief The sequence of integers corresponding to the instructions in this - /// function. - std::vector<unsigned> Sequence; - - /// Contains all target-specific information for this \p OutlinedFunction. - TargetInstrInfo::MachineOutlinerInfo MInfo; - - /// Return the number of candidates for this \p OutlinedFunction. - unsigned getOccurrenceCount() { return OccurrenceCount; } - - /// Decrement the occurrence count of this OutlinedFunction and return the - /// new count. - unsigned decrement() { - assert(OccurrenceCount > 0 && "Can't decrement an empty function!"); - OccurrenceCount--; - return getOccurrenceCount(); - } - - /// \brief Return the number of instructions it would take to outline this - /// function. - unsigned getOutliningCost() { - return (OccurrenceCount * MInfo.CallOverhead) + Sequence.size() + - MInfo.FrameOverhead; - } - - /// \brief Return the number of instructions that would be saved by outlining - /// this function. - unsigned getBenefit() { - unsigned NotOutlinedCost = OccurrenceCount * Sequence.size(); - unsigned OutlinedCost = getOutliningCost(); - return (NotOutlinedCost < OutlinedCost) ? 0 - : NotOutlinedCost - OutlinedCost; - } - - OutlinedFunction(unsigned Name, unsigned OccurrenceCount, - const std::vector<unsigned> &Sequence, - TargetInstrInfo::MachineOutlinerInfo &MInfo) - : OccurrenceCount(OccurrenceCount), Name(Name), Sequence(Sequence), - MInfo(MInfo) {} -}; +namespace { /// Represents an undefined index in the suffix tree. const unsigned EmptyIdx = -1; @@ -242,7 +147,7 @@ struct SuffixTreeNode { /// For all other nodes, this is ignored. unsigned SuffixIdx = EmptyIdx; - /// \brief For internal nodes, a pointer to the internal node representing + /// For internal nodes, a pointer to the internal node representing /// the same sequence with the first character chopped off. /// /// This acts as a shortcut in Ukkonen's algorithm. One of the things that @@ -356,7 +261,7 @@ private: /// The end index of each leaf in the tree. unsigned LeafEndIdx = -1; - /// \brief Helper struct which keeps track of the next insertion point in + /// Helper struct which keeps track of the next insertion point in /// Ukkonen's algorithm. struct ActiveState { /// The next node to insert at. @@ -369,7 +274,7 @@ private: unsigned Len = 0; }; - /// \brief The point the next insertion will take place at in the + /// The point the next insertion will take place at in the /// construction algorithm. ActiveState Active; @@ -416,7 +321,7 @@ private: return N; } - /// \brief Set the suffix indices of the leaves to the start indices of their + /// Set the suffix indices of the leaves to the start indices of their /// respective suffixes. Also stores each leaf in \p LeafVector at its /// respective suffix index. /// @@ -454,7 +359,7 @@ private: } } - /// \brief Construct the suffix tree for the prefix of the input ending at + /// Construct the suffix tree for the prefix of the input ending at /// \p EndIdx. /// /// Used to construct the full suffix tree iteratively. At the end of each @@ -615,16 +520,16 @@ public: } }; -/// \brief Maps \p MachineInstrs to unsigned integers and stores the mappings. +/// Maps \p MachineInstrs to unsigned integers and stores the mappings. struct InstructionMapper { - /// \brief The next available integer to assign to a \p MachineInstr that + /// The next available integer to assign to a \p MachineInstr that /// cannot be outlined. /// /// Set to -3 for compatability with \p DenseMapInfo<unsigned>. unsigned IllegalInstrNumber = -3; - /// \brief The next available integer to assign to a \p MachineInstr that can + /// The next available integer to assign to a \p MachineInstr that can /// be outlined. unsigned LegalInstrNumber = 0; @@ -639,11 +544,11 @@ struct InstructionMapper { /// The vector of unsigned integers that the module is mapped to. std::vector<unsigned> UnsignedVec; - /// \brief Stores the location of the instruction associated with the integer + /// Stores the location of the instruction associated with the integer /// at index i in \p UnsignedVec for each index i. std::vector<MachineBasicBlock::iterator> InstrList; - /// \brief Maps \p *It to a legal integer. + /// Maps \p *It to a legal integer. /// /// Updates \p InstrList, \p UnsignedVec, \p InstructionIntegerMap, /// \p IntegerInstructionMap, and \p LegalInstrNumber. @@ -706,7 +611,7 @@ struct InstructionMapper { return MINumber; } - /// \brief Transforms a \p MachineBasicBlock into a \p vector of \p unsigneds + /// Transforms a \p MachineBasicBlock into a \p vector of \p unsigneds /// and appends it to \p UnsignedVec and \p InstrList. /// /// Two instructions are assigned the same integer if they are identical. @@ -720,20 +625,29 @@ struct InstructionMapper { void convertToUnsignedVec(MachineBasicBlock &MBB, const TargetRegisterInfo &TRI, const TargetInstrInfo &TII) { + unsigned Flags = TII.getMachineOutlinerMBBFlags(MBB); + for (MachineBasicBlock::iterator It = MBB.begin(), Et = MBB.end(); It != Et; It++) { // Keep track of where this instruction is in the module. - switch (TII.getOutliningType(*It)) { - case TargetInstrInfo::MachineOutlinerInstrType::Illegal: + switch (TII.getOutliningType(It, Flags)) { + case InstrType::Illegal: mapToIllegalUnsigned(It); break; - case TargetInstrInfo::MachineOutlinerInstrType::Legal: + case InstrType::Legal: mapToLegalUnsigned(It); break; - case TargetInstrInfo::MachineOutlinerInstrType::Invisible: + case InstrType::LegalTerminator: + mapToLegalUnsigned(It); + InstrList.push_back(It); + UnsignedVec.push_back(IllegalInstrNumber); + IllegalInstrNumber--; + break; + + case InstrType::Invisible: break; } } @@ -757,7 +671,7 @@ struct InstructionMapper { } }; -/// \brief An interprocedural pass which finds repeated sequences of +/// An interprocedural pass which finds repeated sequences of /// instructions and replaces them with calls to functions. /// /// Each instruction is mapped to an unsigned integer and placed in a string. @@ -770,10 +684,19 @@ struct MachineOutliner : public ModulePass { static char ID; - /// \brief Set to true if the outliner should consider functions with + /// Set to true if the outliner should consider functions with /// linkonceodr linkage. bool OutlineFromLinkOnceODRs = false; + /// Set to true if the outliner should run on all functions in the module + /// considered safe for outlining. + /// Set to true by default for compatibility with llc's -run-pass option. + /// Set when the pass is constructed in TargetPassConfig. + bool RunOnAllFunctions = true; + + // Collection of IR functions created by the outliner. + std::vector<Function *> CreatedIRFunctions; + StringRef getPassName() const override { return "Machine Outliner"; } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -783,27 +706,35 @@ struct MachineOutliner : public ModulePass { ModulePass::getAnalysisUsage(AU); } - MachineOutliner(bool OutlineFromLinkOnceODRs = false) - : ModulePass(ID), OutlineFromLinkOnceODRs(OutlineFromLinkOnceODRs) { + MachineOutliner() : ModulePass(ID) { initializeMachineOutlinerPass(*PassRegistry::getPassRegistry()); } + /// Remark output explaining that not outlining a set of candidates would be + /// better than outlining that set. + void emitNotOutliningCheaperRemark( + unsigned StringLen, std::vector<Candidate> &CandidatesForRepeatedSeq, + OutlinedFunction &OF); + + /// Remark output explaining that a function was outlined. + void emitOutlinedFunctionRemark(OutlinedFunction &OF); + /// Find all repeated substrings that satisfy the outlining cost model. /// /// If a substring appears at least twice, then it must be represented by - /// an internal node which appears in at least two suffixes. Each suffix is - /// represented by a leaf node. To do this, we visit each internal node in - /// the tree, using the leaf children of each internal node. If an internal - /// node represents a beneficial substring, then we use each of its leaf - /// children to find the locations of its substring. + /// an internal node which appears in at least two suffixes. Each suffix + /// is represented by a leaf node. To do this, we visit each internal node + /// in the tree, using the leaf children of each internal node. If an + /// internal node represents a beneficial substring, then we use each of + /// its leaf children to find the locations of its substring. /// /// \param ST A suffix tree to query. /// \param TII TargetInstrInfo for the target. /// \param Mapper Contains outlining mapping information. /// \param[out] CandidateList Filled with candidates representing each /// beneficial substring. - /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions each - /// type of candidate. + /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions + /// each type of candidate. /// /// \returns The length of the longest candidate found. unsigned @@ -812,7 +743,7 @@ struct MachineOutliner : public ModulePass { std::vector<std::shared_ptr<Candidate>> &CandidateList, std::vector<OutlinedFunction> &FunctionList); - /// \brief Replace the sequences of instructions represented by the + /// Replace the sequences of instructions represented by the /// \p Candidates in \p CandidateList with calls to \p MachineFunctions /// described in \p FunctionList. /// @@ -852,7 +783,7 @@ struct MachineOutliner : public ModulePass { /// Removes \p C from the candidate list, and updates its \p OutlinedFunction. void prune(Candidate &C, std::vector<OutlinedFunction> &FunctionList); - /// \brief Remove any overlapping candidates that weren't handled by the + /// Remove any overlapping candidates that weren't handled by the /// suffix tree's pruning method. /// /// Pruning from the suffix tree doesn't necessarily remove all overlaps. @@ -873,6 +804,16 @@ struct MachineOutliner : public ModulePass { /// Construct a suffix tree on the instructions in \p M and outline repeated /// strings from that tree. bool runOnModule(Module &M) override; + + /// Return a DISubprogram for OF if one exists, and null otherwise. Helper + /// function for remark emission. + DISubprogram *getSubprogramOrNull(const OutlinedFunction &OF) { + DISubprogram *SP; + for (const std::shared_ptr<Candidate> &C : OF.Candidates) + if (C && C->getMF() && (SP = C->getMF()->getFunction().getSubprogram())) + return SP; + return nullptr; + } }; } // Anonymous namespace. @@ -880,8 +821,10 @@ struct MachineOutliner : public ModulePass { char MachineOutliner::ID = 0; namespace llvm { -ModulePass *createMachineOutlinerPass(bool OutlineFromLinkOnceODRs) { - return new MachineOutliner(OutlineFromLinkOnceODRs); +ModulePass *createMachineOutlinerPass(bool RunOnAllFunctions) { + MachineOutliner *OL = new MachineOutliner(); + OL->RunOnAllFunctions = RunOnAllFunctions; + return OL; } } // namespace llvm @@ -889,6 +832,65 @@ ModulePass *createMachineOutlinerPass(bool OutlineFromLinkOnceODRs) { INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, "Machine Function Outliner", false, false) +void MachineOutliner::emitNotOutliningCheaperRemark( + unsigned StringLen, std::vector<Candidate> &CandidatesForRepeatedSeq, + OutlinedFunction &OF) { + Candidate &C = CandidatesForRepeatedSeq.front(); + MachineOptimizationRemarkEmitter MORE(*(C.getMF()), nullptr); + MORE.emit([&]() { + MachineOptimizationRemarkMissed R(DEBUG_TYPE, "NotOutliningCheaper", + C.front()->getDebugLoc(), C.getMBB()); + R << "Did not outline " << NV("Length", StringLen) << " instructions" + << " from " << NV("NumOccurrences", CandidatesForRepeatedSeq.size()) + << " locations." + << " Bytes from outlining all occurrences (" + << NV("OutliningCost", OF.getOutliningCost()) << ")" + << " >= Unoutlined instruction bytes (" + << NV("NotOutliningCost", OF.getNotOutlinedCost()) << ")" + << " (Also found at: "; + + // Tell the user the other places the candidate was found. + for (unsigned i = 1, e = CandidatesForRepeatedSeq.size(); i < e; i++) { + R << NV((Twine("OtherStartLoc") + Twine(i)).str(), + CandidatesForRepeatedSeq[i].front()->getDebugLoc()); + if (i != e - 1) + R << ", "; + } + + R << ")"; + return R; + }); +} + +void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) { + MachineBasicBlock *MBB = &*OF.MF->begin(); + MachineOptimizationRemarkEmitter MORE(*OF.MF, nullptr); + MachineOptimizationRemark R(DEBUG_TYPE, "OutlinedFunction", + MBB->findDebugLoc(MBB->begin()), MBB); + R << "Saved " << NV("OutliningBenefit", OF.getBenefit()) << " bytes by " + << "outlining " << NV("Length", OF.Sequence.size()) << " instructions " + << "from " << NV("NumOccurrences", OF.getOccurrenceCount()) + << " locations. " + << "(Found at: "; + + // Tell the user the other places the candidate was found. + for (size_t i = 0, e = OF.Candidates.size(); i < e; i++) { + + // Skip over things that were pruned. + if (!OF.Candidates[i]->InCandidateList) + continue; + + R << NV((Twine("StartLoc") + Twine(i)).str(), + OF.Candidates[i]->front()->getDebugLoc()); + if (i != e - 1) + R << ", "; + } + + R << ")"; + + MORE.emit(R); +} + unsigned MachineOutliner::findCandidates( SuffixTree &ST, const TargetInstrInfo &TII, InstructionMapper &Mapper, std::vector<std::shared_ptr<Candidate>> &CandidateList, @@ -923,14 +925,6 @@ unsigned MachineOutliner::findCandidates( // this vector. std::vector<Candidate> CandidatesForRepeatedSeq; - // Describes the start and end point of each candidate. This allows the - // target to infer some information about each occurrence of each repeated - // sequence. - // FIXME: CandidatesForRepeatedSeq and this should be combined. - std::vector< - std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>> - RepeatedSequenceLocs; - // Figure out the call overhead for each instance of the sequence. for (auto &ChildPair : Parent.Children) { SuffixTreeNode *M = ChildPair.second; @@ -966,17 +960,18 @@ unsigned MachineOutliner::findCandidates( CandidatesForRepeatedSeq.end(), [&StartIdx, &EndIdx](const Candidate &C) { return (EndIdx < C.getStartIdx() || - StartIdx > C.getEndIdx()); + StartIdx > C.getEndIdx()); })) { // It doesn't overlap with anything, so we can outline it. // Each sequence is over [StartIt, EndIt]. + // Save the candidate and its location. + MachineBasicBlock::iterator StartIt = Mapper.InstrList[StartIdx]; MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx]; - // Save the candidate and its location. - CandidatesForRepeatedSeq.emplace_back(StartIdx, StringLen, + CandidatesForRepeatedSeq.emplace_back(StartIdx, StringLen, StartIt, + EndIt, StartIt->getParent(), FunctionList.size()); - RepeatedSequenceLocs.emplace_back(std::make_pair(StartIt, EndIt)); } } } @@ -984,69 +979,33 @@ unsigned MachineOutliner::findCandidates( // We've found something we might want to outline. // Create an OutlinedFunction to store it and check if it'd be beneficial // to outline. - TargetInstrInfo::MachineOutlinerInfo MInfo = - TII.getOutlininingCandidateInfo(RepeatedSequenceLocs); + OutlinedFunction OF = + TII.getOutliningCandidateInfo(CandidatesForRepeatedSeq); + + // If we deleted every candidate, then there's nothing to outline. + if (OF.Candidates.empty()) + continue; + std::vector<unsigned> Seq; for (unsigned i = Leaf->SuffixIdx; i < Leaf->SuffixIdx + StringLen; i++) Seq.push_back(ST.Str[i]); - OutlinedFunction OF(FunctionList.size(), CandidatesForRepeatedSeq.size(), - Seq, MInfo); - unsigned Benefit = OF.getBenefit(); + OF.Sequence = Seq; + OF.Name = FunctionList.size(); // Is it better to outline this candidate than not? - if (Benefit < 1) { - // Outlining this candidate would take more instructions than not - // outlining. - // Emit a remark explaining why we didn't outline this candidate. - std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator> C = - RepeatedSequenceLocs[0]; - MachineOptimizationRemarkEmitter MORE( - *(C.first->getParent()->getParent()), nullptr); - MORE.emit([&]() { - MachineOptimizationRemarkMissed R(DEBUG_TYPE, "NotOutliningCheaper", - C.first->getDebugLoc(), - C.first->getParent()); - R << "Did not outline " << NV("Length", StringLen) << " instructions" - << " from " << NV("NumOccurrences", RepeatedSequenceLocs.size()) - << " locations." - << " Instructions from outlining all occurrences (" - << NV("OutliningCost", OF.getOutliningCost()) << ")" - << " >= Unoutlined instruction count (" - << NV("NotOutliningCost", StringLen * OF.getOccurrenceCount()) << ")" - << " (Also found at: "; - - // Tell the user the other places the candidate was found. - for (unsigned i = 1, e = RepeatedSequenceLocs.size(); i < e; i++) { - R << NV((Twine("OtherStartLoc") + Twine(i)).str(), - RepeatedSequenceLocs[i].first->getDebugLoc()); - if (i != e - 1) - R << ", "; - } - - R << ")"; - return R; - }); - - // Move to the next candidate. + if (OF.getBenefit() < 1) { + emitNotOutliningCheaperRemark(StringLen, CandidatesForRepeatedSeq, OF); continue; } if (StringLen > MaxLen) MaxLen = StringLen; - // At this point, the candidate class is seen as beneficial. Set their - // benefit values and save them in the candidate list. - std::vector<std::shared_ptr<Candidate>> CandidatesForFn; - for (Candidate &C : CandidatesForRepeatedSeq) { - C.Benefit = Benefit; - C.MInfo = MInfo; - std::shared_ptr<Candidate> Cptr = std::make_shared<Candidate>(C); - CandidateList.push_back(Cptr); - CandidatesForFn.push_back(Cptr); - } - + // The function is beneficial. Save its candidates to the candidate list + // for pruning. + for (std::shared_ptr<Candidate> &C : OF.Candidates) + CandidateList.push_back(C); FunctionList.push_back(OF); - FunctionList.back().Candidates = CandidatesForFn; // Move to the next function. Parent.IsInTree = false; @@ -1067,11 +1026,11 @@ void MachineOutliner::prune(Candidate &C, // Remove C from the CandidateList. C.InCandidateList = false; - DEBUG(dbgs() << "- Removed a Candidate \n"; - dbgs() << "--- Num fns left for candidate: " << F.getOccurrenceCount() - << "\n"; - dbgs() << "--- Candidate's functions's benefit: " << F.getBenefit() - << "\n";); + LLVM_DEBUG(dbgs() << "- Removed a Candidate \n"; + dbgs() << "--- Num fns left for candidate: " + << F.getOccurrenceCount() << "\n"; + dbgs() << "--- Candidate's functions's benefit: " << F.getBenefit() + << "\n";); } void MachineOutliner::pruneOverlaps( @@ -1119,7 +1078,7 @@ void MachineOutliner::pruneOverlaps( if (C1.getStartIdx() > MaxCandidateLen) FarthestPossibleIdx = C1.getStartIdx() - MaxCandidateLen; - // Compare against the candidates in the list that start at at most + // Compare against the candidates in the list that start at most // FarthestPossibleIdx indices away from C1. There are at most // MaxCandidateLen of these. for (auto Sit = It + 1; Sit != Et; Sit++) { @@ -1205,9 +1164,20 @@ MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF, // NOTE: If this is linkonceodr, then we can take advantage of linker deduping // which gives us better results when we outline from linkonceodr functions. - F->setLinkage(GlobalValue::PrivateLinkage); + F->setLinkage(GlobalValue::InternalLinkage); F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + // FIXME: Set nounwind, so we don't generate eh_frame? Haven't verified it's + // necessary. + + // Set optsize/minsize, so we don't insert padding between outlined + // functions. + F->addFnAttr(Attribute::OptimizeForSize); + F->addFnAttr(Attribute::MinSize); + + // Save F so that we can add debug info later if we need to. + CreatedIRFunctions.push_back(F); + BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F); IRBuilder<> Builder(EntryBB); Builder.CreateRetVoid(); @@ -1221,8 +1191,6 @@ MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF, // Insert the new function into the module. MF.insert(MF.begin(), &MBB); - TII.insertOutlinerPrologue(MBB, MF, OF.MInfo); - // Copy over the instructions for the function using the integer mappings in // its sequence. for (unsigned Str : OF.Sequence) { @@ -1231,13 +1199,53 @@ MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF, NewMI->dropMemRefs(); // Don't keep debug information for outlined instructions. - // FIXME: This means outlined functions are currently undebuggable. NewMI->setDebugLoc(DebugLoc()); MBB.insert(MBB.end(), NewMI); } - TII.insertOutlinerEpilogue(MBB, MF, OF.MInfo); + TII.buildOutlinedFrame(MBB, MF, OF); + + // If there's a DISubprogram associated with this outlined function, then + // emit debug info for the outlined function. + if (DISubprogram *SP = getSubprogramOrNull(OF)) { + // We have a DISubprogram. Get its DICompileUnit. + DICompileUnit *CU = SP->getUnit(); + DIBuilder DB(M, true, CU); + DIFile *Unit = SP->getFile(); + Mangler Mg; + + // Walk over each IR function we created in the outliner and create + // DISubprograms for each function. + for (Function *F : CreatedIRFunctions) { + // Get the mangled name of the function for the linkage name. + std::string Dummy; + llvm::raw_string_ostream MangledNameStream(Dummy); + Mg.getNameWithPrefix(MangledNameStream, F, false); + + DISubprogram *SP = DB.createFunction( + Unit /* Context */, F->getName(), StringRef(MangledNameStream.str()), + Unit /* File */, + 0 /* Line 0 is reserved for compiler-generated code. */, + DB.createSubroutineType( + DB.getOrCreateTypeArray(None)), /* void type */ + false, true, 0, /* Line 0 is reserved for compiler-generated code. */ + DINode::DIFlags::FlagArtificial /* Compiler-generated code. */, + true /* Outlined code is optimized code by definition. */); + + // Don't add any new variables to the subprogram. + DB.finalizeSubprogram(SP); + + // Attach subprogram to the function. + F->setSubprogram(SP); + } + + // We're done with the DIBuilder. + DB.finalize(); + } + // Outlined functions shouldn't preserve liveness. + MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness); + MF.getRegInfo().freezeReservedRegs(MF); return &MF; } @@ -1260,79 +1268,73 @@ bool MachineOutliner::outline( if (OF.getBenefit() < 1) continue; - // If not, then outline it. - assert(C.getStartIdx() < Mapper.InstrList.size() && - "Candidate out of bounds!"); - MachineBasicBlock *MBB = (*Mapper.InstrList[C.getStartIdx()]).getParent(); - MachineBasicBlock::iterator StartIt = Mapper.InstrList[C.getStartIdx()]; - unsigned EndIdx = C.getEndIdx(); - - assert(EndIdx < Mapper.InstrList.size() && "Candidate out of bounds!"); - MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx]; - assert(EndIt != MBB->end() && "EndIt out of bounds!"); - - EndIt++; // Erase needs one past the end index. - // Does this candidate have a function yet? if (!OF.MF) { OF.MF = createOutlinedFunction(M, OF, Mapper); - MachineBasicBlock *MBB = &*OF.MF->begin(); - - // Output a remark telling the user that an outlined function was created, - // and explaining where it came from. - MachineOptimizationRemarkEmitter MORE(*OF.MF, nullptr); - MachineOptimizationRemark R(DEBUG_TYPE, "OutlinedFunction", - MBB->findDebugLoc(MBB->begin()), MBB); - R << "Saved " << NV("OutliningBenefit", OF.getBenefit()) - << " instructions by " - << "outlining " << NV("Length", OF.Sequence.size()) << " instructions " - << "from " << NV("NumOccurrences", OF.getOccurrenceCount()) - << " locations. " - << "(Found at: "; - - // Tell the user the other places the candidate was found. - for (size_t i = 0, e = OF.Candidates.size(); i < e; i++) { - - // Skip over things that were pruned. - if (!OF.Candidates[i]->InCandidateList) - continue; - - R << NV( - (Twine("StartLoc") + Twine(i)).str(), - Mapper.InstrList[OF.Candidates[i]->getStartIdx()]->getDebugLoc()); - if (i != e - 1) - R << ", "; - } - - R << ")"; - - MORE.emit(R); + emitOutlinedFunctionRemark(OF); FunctionsCreated++; } MachineFunction *MF = OF.MF; + MachineBasicBlock &MBB = *C.getMBB(); + MachineBasicBlock::iterator StartIt = C.front(); + MachineBasicBlock::iterator EndIt = C.back(); + assert(StartIt != C.getMBB()->end() && "StartIt out of bounds!"); + assert(EndIt != C.getMBB()->end() && "EndIt out of bounds!"); + const TargetSubtargetInfo &STI = MF->getSubtarget(); const TargetInstrInfo &TII = *STI.getInstrInfo(); // Insert a call to the new function and erase the old sequence. - TII.insertOutlinedCall(M, *MBB, StartIt, *MF, C.MInfo); - StartIt = Mapper.InstrList[C.getStartIdx()]; - MBB->erase(StartIt, EndIt); + auto CallInst = TII.insertOutlinedCall(M, MBB, StartIt, *OF.MF, C); + + // If the caller tracks liveness, then we need to make sure that anything + // we outline doesn't break liveness assumptions. + // The outlined functions themselves currently don't track liveness, but + // we should make sure that the ranges we yank things out of aren't + // wrong. + if (MBB.getParent()->getProperties().hasProperty( + MachineFunctionProperties::Property::TracksLiveness)) { + // Helper lambda for adding implicit def operands to the call instruction. + auto CopyDefs = [&CallInst](MachineInstr &MI) { + for (MachineOperand &MOP : MI.operands()) { + // Skip over anything that isn't a register. + if (!MOP.isReg()) + continue; + + // If it's a def, add it to the call instruction. + if (MOP.isDef()) + CallInst->addOperand( + MachineOperand::CreateReg(MOP.getReg(), true, /* isDef = true */ + true /* isImp = true */)); + } + }; + + // Copy over the defs in the outlined range. + // First inst in outlined range <-- Anything that's defined in this + // ... .. range has to be added as an implicit + // Last inst in outlined range <-- def to the call instruction. + std::for_each(CallInst, std::next(EndIt), CopyDefs); + } + // Erase from the point after where the call was inserted up to, and + // including, the final instruction in the sequence. + // Erase needs one past the end, so we need std::next there too. + MBB.erase(std::next(StartIt), std::next(EndIt)); OutlinedSomething = true; // Statistics. NumOutlined++; } - DEBUG(dbgs() << "OutlinedSomething = " << OutlinedSomething << "\n";); + LLVM_DEBUG(dbgs() << "OutlinedSomething = " << OutlinedSomething << "\n";); return OutlinedSomething; } bool MachineOutliner::runOnModule(Module &M) { - - // Is there anything in the module at all? + // Check if there's anything in the module. If it's empty, then there's + // nothing to outline. if (M.empty()) return false; @@ -1342,25 +1344,67 @@ bool MachineOutliner::runOnModule(Module &M) { const TargetRegisterInfo *TRI = STI.getRegisterInfo(); const TargetInstrInfo *TII = STI.getInstrInfo(); + // If the user passed -enable-machine-outliner=always or + // -enable-machine-outliner, the pass will run on all functions in the module. + // Otherwise, if the target supports default outlining, it will run on all + // functions deemed by the target to be worth outlining from by default. Tell + // the user how the outliner is running. + LLVM_DEBUG( + dbgs() << "Machine Outliner: Running on "; + if (RunOnAllFunctions) + dbgs() << "all functions"; + else + dbgs() << "target-default functions"; + dbgs() << "\n" + ); + + // If the user specifies that they want to outline from linkonceodrs, set + // it here. + OutlineFromLinkOnceODRs = EnableLinkOnceODROutlining; + InstructionMapper Mapper; - // Build instruction mappings for each function in the module. + // Build instruction mappings for each function in the module. Start by + // iterating over each Function in M. for (Function &F : M) { - MachineFunction &MF = MMI.getOrCreateMachineFunction(F); - // Is the function empty? Safe to outline from? - if (F.empty() || - !TII->isFunctionSafeToOutlineFrom(MF, OutlineFromLinkOnceODRs)) + // If there's nothing in F, then there's no reason to try and outline from + // it. + if (F.empty()) + continue; + + // There's something in F. Check if it has a MachineFunction associated with + // it. + MachineFunction *MF = MMI.getMachineFunction(F); + + // If it doesn't, then there's nothing to outline from. Move to the next + // Function. + if (!MF) + continue; + + if (!RunOnAllFunctions && !TII->shouldOutlineFromFunctionByDefault(*MF)) continue; - // If it is, look at each MachineBasicBlock in the function. - for (MachineBasicBlock &MBB : MF) { + // We have a MachineFunction. Ask the target if it's suitable for outlining. + // If it isn't, then move on to the next Function in the module. + if (!TII->isFunctionSafeToOutlineFrom(*MF, OutlineFromLinkOnceODRs)) + continue; - // Is there anything in MBB? + // We have a function suitable for outlining. Iterate over every + // MachineBasicBlock in MF and try to map its instructions to a list of + // unsigned integers. + for (MachineBasicBlock &MBB : *MF) { + // If there isn't anything in MBB, then there's no point in outlining from + // it. if (MBB.empty()) continue; - // If yes, map it. + // Check if MBB could be the target of an indirect branch. If it is, then + // we don't want to outline from it. + if (MBB.hasAddressTaken()) + continue; + + // MBB is suitable for outlining. Map it to a list of unsigneds. Mapper.convertToUnsignedVec(MBB, *TRI, *TII); } } @@ -1378,5 +1422,7 @@ bool MachineOutliner::runOnModule(Module &M) { pruneOverlaps(CandidateList, FunctionList, Mapper, MaxCandidateLen, *TII); // Outline each of the candidates and return true if something was outlined. - return outline(M, CandidateList, FunctionList, Mapper); + bool OutlinedSomething = outline(M, CandidateList, FunctionList, Mapper); + + return OutlinedSomething; } diff --git a/contrib/llvm/lib/CodeGen/MachinePipeliner.cpp b/contrib/llvm/lib/CodeGen/MachinePipeliner.cpp index 18cb9af499a6..9bb00aaef86d 100644 --- a/contrib/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/contrib/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -10,14 +10,14 @@ // An implementation of the Swing Modulo Scheduling (SMS) software pipeliner. // // Software pipelining (SWP) is an instruction scheduling technique for loops -// that overlap loop iterations and explioits ILP via a compiler transformation. +// that overlap loop iterations and exploits ILP via a compiler transformation. // // Swing Modulo Scheduling is an implementation of software pipelining // that generates schedules that are near optimal in terms of initiation // interval, register requirements, and stage count. See the papers: // // "Swing Modulo Scheduling: A Lifetime-Sensitive Approach", by J. Llosa, -// A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Processings of the 1996 +// A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Proceedings of the 1996 // Conference on Parallel Architectures and Compilation Techiniques. // // "Lifetime-Sensitive Modulo Scheduling in a Production Environment", by J. @@ -93,6 +93,7 @@ #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" @@ -125,6 +126,7 @@ using namespace llvm; STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline"); STATISTIC(NumPipelined, "Number of loops software pipelined"); +STATISTIC(NumNodeOrderIssues, "Number of node order issues found"); /// A command line option to turn software pipelining on or off. static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), @@ -138,7 +140,7 @@ static cl::opt<bool> EnableSWPOptSize("enable-pipeliner-opt-size", /// A command line argument to limit minimum initial interval for pipelining. static cl::opt<int> SwpMaxMii("pipeliner-max-mii", - cl::desc("Size limit for the the MII."), + cl::desc("Size limit for the MII."), cl::Hidden, cl::init(27)); /// A command line argument to limit the number of stages in the pipeline. @@ -217,6 +219,7 @@ public: } private: + void preprocessPhiNodes(MachineBasicBlock &B); bool canPipelineLoop(MachineLoop &L); bool scheduleLoop(MachineLoop &L); bool swingModuloScheduler(MachineLoop &L); @@ -241,6 +244,8 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs { struct NodeInfo { int ASAP = 0; int ALAP = 0; + int ZeroLatencyDepth = 0; + int ZeroLatencyHeight = 0; NodeInfo() = default; }; @@ -313,15 +318,27 @@ public: /// Return the latest time an instruction my be scheduled. int getALAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ALAP; } - /// The mobility function, which the the number of slots in which + /// The mobility function, which the number of slots in which /// an instruction may be scheduled. int getMOV(SUnit *Node) { return getALAP(Node) - getASAP(Node); } /// The depth, in the dependence graph, for a node. - int getDepth(SUnit *Node) { return Node->getDepth(); } + unsigned getDepth(SUnit *Node) { return Node->getDepth(); } + + /// The maximum unweighted length of a path from an arbitrary node to the + /// given node in which each edge has latency 0 + int getZeroLatencyDepth(SUnit *Node) { + return ScheduleInfo[Node->NodeNum].ZeroLatencyDepth; + } /// The height, in the dependence graph, for a node. - int getHeight(SUnit *Node) { return Node->getHeight(); } + unsigned getHeight(SUnit *Node) { return Node->getHeight(); } + + /// The maximum unweighted length of a path from the given node to an + /// arbitrary node in which each edge has latency 0 + int getZeroLatencyHeight(SUnit *Node) { + return ScheduleInfo[Node->NodeNum].ZeroLatencyHeight; + } /// Return true if the dependence is a back-edge in the data dependence graph. /// Since the DAG doesn't contain cycles, we represent a cycle in the graph @@ -332,29 +349,7 @@ public: return Source->getInstr()->isPHI() || Dep.getSUnit()->getInstr()->isPHI(); } - /// Return true if the dependence is an order dependence between non-Phis. - static bool isOrder(SUnit *Source, const SDep &Dep) { - if (Dep.getKind() != SDep::Order) - return false; - return (!Source->getInstr()->isPHI() && - !Dep.getSUnit()->getInstr()->isPHI()); - } - - bool isLoopCarriedOrder(SUnit *Source, const SDep &Dep, bool isSucc = true); - - /// The latency of the dependence. - unsigned getLatency(SUnit *Source, const SDep &Dep) { - // Anti dependences represent recurrences, so use the latency of the - // instruction on the back-edge. - if (Dep.getKind() == SDep::Anti) { - if (Source->getInstr()->isPHI()) - return Dep.getSUnit()->Latency; - if (Dep.getSUnit()->getInstr()->isPHI()) - return Source->Latency; - return Dep.getLatency(); - } - return Dep.getLatency(); - } + bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc = true); /// The distance function, which indicates that operation V of iteration I /// depends on operations U of iteration I-distance. @@ -404,6 +399,7 @@ private: void addConnectedNodes(SUnit *SU, NodeSet &NewSet, SetVector<SUnit *> &NodesAdded); void computeNodeOrder(NodeSetType &NodeSets); + void checkValidNodeOrder(const NodeSetType &Circuits) const; bool schedulePipeline(SMSchedule &Schedule); void generatePipelinedLoop(SMSchedule &Schedule); void generateProlog(SMSchedule &Schedule, unsigned LastStage, @@ -438,7 +434,7 @@ private: unsigned InstStageNum, SMSchedule &Schedule); void updateInstruction(MachineInstr *NewMI, bool LastDef, - unsigned CurStageNum, unsigned InstStageNum, + unsigned CurStageNum, unsigned InstrStageNum, SMSchedule &Schedule, ValueMapTy *VRMap); MachineInstr *findDefInLoop(unsigned Reg); unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal, @@ -465,15 +461,22 @@ class NodeSet { bool HasRecurrence = false; unsigned RecMII = 0; int MaxMOV = 0; - int MaxDepth = 0; + unsigned MaxDepth = 0; unsigned Colocate = 0; SUnit *ExceedPressure = nullptr; + unsigned Latency = 0; public: using iterator = SetVector<SUnit *>::const_iterator; NodeSet() = default; - NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) {} + NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) { + Latency = 0; + for (unsigned i = 0, e = Nodes.size(); i < e; ++i) + for (const SDep &Succ : Nodes[i]->Succs) + if (Nodes.count(Succ.getSUnit())) + Latency += Succ.getLatency(); + } bool insert(SUnit *SU) { return Nodes.insert(SU); } @@ -513,6 +516,10 @@ public: } } + unsigned getLatency() { return Latency; } + + unsigned getMaxDepth() { return MaxDepth; } + void clear() { Nodes.clear(); RecMII = 0; @@ -563,7 +570,7 @@ public: #endif }; -/// This class repesents the scheduled code. The main data structure is a +/// This class represents the scheduled code. The main data structure is a /// map from scheduled cycle to instructions. During scheduling, the /// data structure explicitly represents all stages/iterations. When /// the algorithm finshes, the schedule is collapsed into a single stage, @@ -700,10 +707,10 @@ public: bool isValidSchedule(SwingSchedulerDAG *SSD); void finalizeSchedule(SwingSchedulerDAG *SSD); - bool orderDependence(SwingSchedulerDAG *SSD, SUnit *SU, + void orderDependence(SwingSchedulerDAG *SSD, SUnit *SU, std::deque<SUnit *> &Insts); bool isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi); - bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Inst, + bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Def, MachineOperand &MO); void print(raw_ostream &os) const; void dump() const; @@ -804,20 +811,41 @@ bool MachinePipeliner::canPipelineLoop(MachineLoop &L) { if (!L.getLoopPreheader()) return false; - // If any of the Phis contain subregs, then we can't pipeline - // because we don't know how to maintain subreg information in the - // VMap structure. - MachineBasicBlock *MBB = L.getHeader(); - for (MachineBasicBlock::iterator BBI = MBB->instr_begin(), - BBE = MBB->getFirstNonPHI(); - BBI != BBE; ++BBI) - for (unsigned i = 1; i != BBI->getNumOperands(); i += 2) - if (BBI->getOperand(i).getSubReg() != 0) - return false; - + // Remove any subregisters from inputs to phi nodes. + preprocessPhiNodes(*L.getHeader()); return true; } +void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) { + MachineRegisterInfo &MRI = MF->getRegInfo(); + SlotIndexes &Slots = *getAnalysis<LiveIntervals>().getSlotIndexes(); + + for (MachineInstr &PI : make_range(B.begin(), B.getFirstNonPHI())) { + MachineOperand &DefOp = PI.getOperand(0); + assert(DefOp.getSubReg() == 0); + auto *RC = MRI.getRegClass(DefOp.getReg()); + + for (unsigned i = 1, n = PI.getNumOperands(); i != n; i += 2) { + MachineOperand &RegOp = PI.getOperand(i); + if (RegOp.getSubReg() == 0) + continue; + + // If the operand uses a subregister, replace it with a new register + // without subregisters, and generate a copy to the new register. + unsigned NewReg = MRI.createVirtualRegister(RC); + MachineBasicBlock &PredB = *PI.getOperand(i+1).getMBB(); + MachineBasicBlock::iterator At = PredB.getFirstTerminator(); + const DebugLoc &DL = PredB.findDebugLoc(At); + auto Copy = BuildMI(PredB, At, DL, TII->get(TargetOpcode::COPY), NewReg) + .addReg(RegOp.getReg(), getRegState(RegOp), + RegOp.getSubReg()); + Slots.insertMachineInstrInMaps(*Copy); + RegOp.setReg(NewReg); + RegOp.setSubReg(0); + } + } +} + /// The SMS algorithm consists of the following main steps: /// 1. Computation and analysis of the dependence graph. /// 2. Ordering of the nodes (instructions). @@ -858,13 +886,14 @@ void SwingSchedulerDAG::schedule() { Topo.InitDAGTopologicalSorting(); postprocessDAG(); changeDependences(); - DEBUG({ + LLVM_DEBUG({ for (unsigned su = 0, e = SUnits.size(); su != e; ++su) SUnits[su].dumpAll(this); }); NodeSetType NodeSets; findCircuits(NodeSets); + NodeSetType Circuits = NodeSets; // Calculate the MII. unsigned ResMII = calculateResMII(); @@ -877,8 +906,8 @@ void SwingSchedulerDAG::schedule() { RecMII = 0; MII = std::max(ResMII, RecMII); - DEBUG(dbgs() << "MII = " << MII << " (rec=" << RecMII << ", res=" << ResMII - << ")\n"); + LLVM_DEBUG(dbgs() << "MII = " << MII << " (rec=" << RecMII + << ", res=" << ResMII << ")\n"); // Can't schedule a loop without a valid MII. if (MII == 0) @@ -896,20 +925,20 @@ void SwingSchedulerDAG::schedule() { checkNodeSets(NodeSets); - DEBUG({ + LLVM_DEBUG({ for (auto &I : NodeSets) { dbgs() << " Rec NodeSet "; I.dump(); } }); - std::sort(NodeSets.begin(), NodeSets.end(), std::greater<NodeSet>()); + std::stable_sort(NodeSets.begin(), NodeSets.end(), std::greater<NodeSet>()); groupRemainingNodes(NodeSets); removeDuplicateNodes(NodeSets); - DEBUG({ + LLVM_DEBUG({ for (auto &I : NodeSets) { dbgs() << " NodeSet "; I.dump(); @@ -918,6 +947,9 @@ void SwingSchedulerDAG::schedule() { computeNodeOrder(NodeSets); + // check for node order issues + checkValidNodeOrder(Circuits); + SMSchedule Schedule(Pass.MF); Scheduled = schedulePipeline(Schedule); @@ -972,7 +1004,7 @@ static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) { return 0; } -/// Return the Phi register value that comes the the loop block. +/// Return the Phi register value that comes the loop block. static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) { for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2) if (Phi.getOperand(i + 1).getMBB() == LoopBB) @@ -1022,6 +1054,13 @@ static void getUnderlyingObjects(MachineInstr *MI, if (!MM->getValue()) return; GetUnderlyingObjects(const_cast<Value *>(MM->getValue()), Objs, DL); + for (Value *V : Objs) { + if (!isIdentifiedObject(V)) { + Objs.clear(); + return; + } + Objs.push_back(V); + } } /// Add a chain edge between a load and store if the store can be an @@ -1030,6 +1069,8 @@ static void getUnderlyingObjects(MachineInstr *MI, /// but that code doesn't create loop carried dependences. void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) { MapVector<Value *, SmallVector<SUnit *, 4>> PendingLoads; + Value *UnknownValue = + UndefValue::get(Type::getVoidTy(MF.getFunction().getContext())); for (auto &SU : SUnits) { MachineInstr &MI = *SU.getInstr(); if (isDependenceBarrier(MI, AA)) @@ -1037,6 +1078,8 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) { else if (MI.mayLoad()) { SmallVector<Value *, 4> Objs; getUnderlyingObjects(&MI, Objs, MF.getDataLayout()); + if (Objs.empty()) + Objs.push_back(UnknownValue); for (auto V : Objs) { SmallVector<SUnit *, 4> &SUs = PendingLoads[V]; SUs.push_back(&SU); @@ -1044,6 +1087,8 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) { } else if (MI.mayStore()) { SmallVector<Value *, 4> Objs; getUnderlyingObjects(&MI, Objs, MF.getDataLayout()); + if (Objs.empty()) + Objs.push_back(UnknownValue); for (auto V : Objs) { MapVector<Value *, SmallVector<SUnit *, 4>>::iterator I = PendingLoads.find(V); @@ -1058,33 +1103,39 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) { // offset, then mark the dependence as loop carried potentially. unsigned BaseReg1, BaseReg2; int64_t Offset1, Offset2; - if (!TII->getMemOpBaseRegImmOfs(LdMI, BaseReg1, Offset1, TRI) || - !TII->getMemOpBaseRegImmOfs(MI, BaseReg2, Offset2, TRI)) { - SU.addPred(SDep(Load, SDep::Barrier)); - continue; - } - if (BaseReg1 == BaseReg2 && (int)Offset1 < (int)Offset2) { - assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) && - "What happened to the chain edge?"); - SU.addPred(SDep(Load, SDep::Barrier)); - continue; + if (TII->getMemOpBaseRegImmOfs(LdMI, BaseReg1, Offset1, TRI) && + TII->getMemOpBaseRegImmOfs(MI, BaseReg2, Offset2, TRI)) { + if (BaseReg1 == BaseReg2 && (int)Offset1 < (int)Offset2) { + assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) && + "What happened to the chain edge?"); + SDep Dep(Load, SDep::Barrier); + Dep.setLatency(1); + SU.addPred(Dep); + continue; + } } // Second, the more expensive check that uses alias analysis on the // base registers. If they alias, and the load offset is less than // the store offset, the mark the dependence as loop carried. if (!AA) { - SU.addPred(SDep(Load, SDep::Barrier)); + SDep Dep(Load, SDep::Barrier); + Dep.setLatency(1); + SU.addPred(Dep); continue; } MachineMemOperand *MMO1 = *LdMI.memoperands_begin(); MachineMemOperand *MMO2 = *MI.memoperands_begin(); if (!MMO1->getValue() || !MMO2->getValue()) { - SU.addPred(SDep(Load, SDep::Barrier)); + SDep Dep(Load, SDep::Barrier); + Dep.setLatency(1); + SU.addPred(Dep); continue; } if (MMO1->getValue() == MMO2->getValue() && MMO1->getOffset() <= MMO2->getOffset()) { - SU.addPred(SDep(Load, SDep::Barrier)); + SDep Dep(Load, SDep::Barrier); + Dep.setLatency(1); + SU.addPred(Dep); continue; } AliasResult AAResult = AA->alias( @@ -1093,8 +1144,11 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) { MemoryLocation(MMO2->getValue(), MemoryLocation::UnknownSize, MMO2->getAAInfo())); - if (AAResult != NoAlias) - SU.addPred(SDep(Load, SDep::Barrier)); + if (AAResult != NoAlias) { + SDep Dep(Load, SDep::Barrier); + Dep.setLatency(1); + SU.addPred(Dep); + } } } } @@ -1136,6 +1190,7 @@ void SwingSchedulerDAG::updatePhiDependences() { if (SU != nullptr && UseMI->isPHI()) { if (!MI->isPHI()) { SDep Dep(SU, SDep::Anti, Reg); + Dep.setLatency(1); I.addPred(Dep); } else { HasPhiDef = Reg; @@ -1382,7 +1437,7 @@ unsigned SwingSchedulerDAG::calculateResMII() { /// Iterate over each circuit. Compute the delay(c) and distance(c) /// for each circuit. The II needs to satisfy the inequality /// delay(c) - II*distance(c) <= 0. For each circuit, choose the smallest -/// II that satistifies the inequality, and the RecMII is the maximum +/// II that satisfies the inequality, and the RecMII is the maximum /// of those values. unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) { unsigned RecMII = 0; @@ -1391,7 +1446,7 @@ unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) { if (Nodes.empty()) continue; - unsigned Delay = Nodes.size() - 1; + unsigned Delay = Nodes.getLatency(); unsigned Distance = 1; // ii = ceil(delay / distance) @@ -1437,10 +1492,23 @@ static void swapAntiDependences(std::vector<SUnit> &SUnits) { void SwingSchedulerDAG::Circuits::createAdjacencyStructure( SwingSchedulerDAG *DAG) { BitVector Added(SUnits.size()); + DenseMap<int, int> OutputDeps; for (int i = 0, e = SUnits.size(); i != e; ++i) { Added.reset(); // Add any successor to the adjacency matrix and exclude duplicates. for (auto &SI : SUnits[i].Succs) { + // Only create a back-edge on the first and last nodes of a dependence + // chain. This records any chains and adds them later. + if (SI.getKind() == SDep::Output) { + int N = SI.getSUnit()->NodeNum; + int BackEdge = i; + auto Dep = OutputDeps.find(BackEdge); + if (Dep != OutputDeps.end()) { + BackEdge = Dep->second; + OutputDeps.erase(Dep); + } + OutputDeps[N] = BackEdge; + } // Do not process a boundary node and a back-edge is processed only // if it goes to a Phi. if (SI.getSUnit()->isBoundaryNode() || @@ -1456,7 +1524,7 @@ void SwingSchedulerDAG::Circuits::createAdjacencyStructure( // adjacency matrix. for (auto &PI : SUnits[i].Preds) { if (!SUnits[i].getInstr()->mayStore() || - !DAG->isLoopCarriedOrder(&SUnits[i], PI, false)) + !DAG->isLoopCarriedDep(&SUnits[i], PI, false)) continue; if (PI.getKind() == SDep::Order && PI.getSUnit()->getInstr()->mayLoad()) { int N = PI.getSUnit()->NodeNum; @@ -1467,6 +1535,12 @@ void SwingSchedulerDAG::Circuits::createAdjacencyStructure( } } } + // Add back-eges in the adjacency matrix for the output dependences. + for (auto &OD : OutputDeps) + if (!Added.test(OD.second)) { + AdjK[OD.first].push_back(OD.second); + Added.set(OD.second); + } } /// Identify an elementary circuit in the dependence graph starting at the @@ -1543,7 +1617,7 @@ void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) { } /// Return true for DAG nodes that we ignore when computing the cost functions. -/// We ignore the back-edge recurrence in order to avoid unbounded recurison +/// We ignore the back-edge recurrence in order to avoid unbounded recursion /// in the calculation of the ASAP, ALAP, etc functions. static bool ignoreDependence(const SDep &D, bool isPred) { if (D.isArtificial()) @@ -1560,7 +1634,7 @@ static bool ignoreDependence(const SDep &D, bool isPred) { void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) { ScheduleInfo.resize(SUnits.size()); - DEBUG({ + LLVM_DEBUG({ for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(), E = Topo.end(); I != E; ++I) { @@ -1570,49 +1644,59 @@ void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) { }); int maxASAP = 0; - // Compute ASAP. + // Compute ASAP and ZeroLatencyDepth. for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(), E = Topo.end(); I != E; ++I) { int asap = 0; + int zeroLatencyDepth = 0; SUnit *SU = &SUnits[*I]; for (SUnit::const_pred_iterator IP = SU->Preds.begin(), EP = SU->Preds.end(); IP != EP; ++IP) { + SUnit *pred = IP->getSUnit(); + if (IP->getLatency() == 0) + zeroLatencyDepth = + std::max(zeroLatencyDepth, getZeroLatencyDepth(pred) + 1); if (ignoreDependence(*IP, true)) continue; - SUnit *pred = IP->getSUnit(); - asap = std::max(asap, (int)(getASAP(pred) + getLatency(SU, *IP) - + asap = std::max(asap, (int)(getASAP(pred) + IP->getLatency() - getDistance(pred, SU, *IP) * MII)); } maxASAP = std::max(maxASAP, asap); ScheduleInfo[*I].ASAP = asap; + ScheduleInfo[*I].ZeroLatencyDepth = zeroLatencyDepth; } - // Compute ALAP and MOV. + // Compute ALAP, ZeroLatencyHeight, and MOV. for (ScheduleDAGTopologicalSort::const_reverse_iterator I = Topo.rbegin(), E = Topo.rend(); I != E; ++I) { int alap = maxASAP; + int zeroLatencyHeight = 0; SUnit *SU = &SUnits[*I]; for (SUnit::const_succ_iterator IS = SU->Succs.begin(), ES = SU->Succs.end(); IS != ES; ++IS) { + SUnit *succ = IS->getSUnit(); + if (IS->getLatency() == 0) + zeroLatencyHeight = + std::max(zeroLatencyHeight, getZeroLatencyHeight(succ) + 1); if (ignoreDependence(*IS, true)) continue; - SUnit *succ = IS->getSUnit(); - alap = std::min(alap, (int)(getALAP(succ) - getLatency(SU, *IS) + + alap = std::min(alap, (int)(getALAP(succ) - IS->getLatency() + getDistance(SU, succ, *IS) * MII)); } ScheduleInfo[*I].ALAP = alap; + ScheduleInfo[*I].ZeroLatencyHeight = zeroLatencyHeight; } // After computing the node functions, compute the summary for each node set. for (NodeSet &I : NodeSets) I.computeNodeSetInfo(this); - DEBUG({ + LLVM_DEBUG({ for (unsigned i = 0; i < SUnits.size(); i++) { dbgs() << "\tNode " << i << ":\n"; dbgs() << "\t ASAP = " << getASAP(&SUnits[i]) << "\n"; @@ -1620,6 +1704,8 @@ void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) { dbgs() << "\t MOV = " << getMOV(&SUnits[i]) << "\n"; dbgs() << "\t D = " << getDepth(&SUnits[i]) << "\n"; dbgs() << "\t H = " << getHeight(&SUnits[i]) << "\n"; + dbgs() << "\t ZLD = " << getZeroLatencyDepth(&SUnits[i]) << "\n"; + dbgs() << "\t ZLH = " << getZeroLatencyHeight(&SUnits[i]) << "\n"; } }); } @@ -1778,7 +1864,8 @@ void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) { RecRPTracker.closeBottom(); std::vector<SUnit *> SUnits(NS.begin(), NS.end()); - std::sort(SUnits.begin(), SUnits.end(), [](const SUnit *A, const SUnit *B) { + llvm::sort(SUnits.begin(), SUnits.end(), + [](const SUnit *A, const SUnit *B) { return A->NodeNum > B->NodeNum; }); @@ -1796,9 +1883,10 @@ void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) { CriticalPSets, RecRegPressure.MaxSetPressure); if (RPDelta.Excess.isValid()) { - DEBUG(dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") " - << TRI->getRegPressureSetName(RPDelta.Excess.getPSet()) - << ":" << RPDelta.Excess.getUnitInc()); + LLVM_DEBUG( + dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") " + << TRI->getRegPressureSetName(RPDelta.Excess.getPSet()) + << ":" << RPDelta.Excess.getUnitInc()); NS.setExceedPressure(SU); break; } @@ -1834,25 +1922,23 @@ void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) { /// Check if the existing node-sets are profitable. If not, then ignore the /// recurrent node-sets, and attempt to schedule all nodes together. This is -/// a heuristic. If the MII is large and there is a non-recurrent node with -/// a large depth compared to the MII, then it's best to try and schedule -/// all instruction together instead of starting with the recurrent node-sets. +/// a heuristic. If the MII is large and all the recurrent node-sets are small, +/// then it's best to try to schedule all instructions together instead of +/// starting with the recurrent node-sets. void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) { // Look for loops with a large MII. - if (MII <= 20) + if (MII < 17) return; // Check if the node-set contains only a simple add recurrence. - for (auto &NS : NodeSets) - if (NS.size() > 2) + for (auto &NS : NodeSets) { + if (NS.getRecMII() > 2) return; - // If the depth of any instruction is significantly larger than the MII, then - // ignore the recurrent node-sets and treat all instructions equally. - for (auto &SU : SUnits) - if (SU.getDepth() > MII * 1.5) { - NodeSets.clear(); - DEBUG(dbgs() << "Clear recurrence node-sets\n"); + if (NS.getMaxDepth() > MII) return; - } + } + NodeSets.clear(); + LLVM_DEBUG(dbgs() << "Clear recurrence node-sets\n"); + return; } /// Add the nodes that do not belong to a recurrence set into groups @@ -1907,7 +1993,7 @@ void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) { if (!NewSet.empty()) NodeSets.push_back(NewSet); - // Create new nodes sets with the connected nodes any any remaining node that + // Create new nodes sets with the connected nodes any remaining node that // has no predecessor. for (unsigned i = 0; i < SUnits.size(); ++i) { SUnit *SU = &SUnits[i]; @@ -1988,14 +2074,6 @@ void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) { } } -/// Return true if Inst1 defines a value that is used in Inst2. -static bool hasDataDependence(SUnit *Inst1, SUnit *Inst2) { - for (auto &SI : Inst1->Succs) - if (SI.getSUnit() == Inst2 && SI.getKind() == SDep::Data) - return true; - return false; -} - /// Compute an ordered list of the dependence graph nodes, which /// indicates the order that the nodes will be scheduled. This is a /// two-level algorithm. First, a partial order is created, which @@ -2005,59 +2083,62 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) { NodeOrder.clear(); for (auto &Nodes : NodeSets) { - DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n"); + LLVM_DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n"); OrderKind Order; SmallSetVector<SUnit *, 8> N; if (pred_L(NodeOrder, N) && isSubset(N, Nodes)) { R.insert(N.begin(), N.end()); Order = BottomUp; - DEBUG(dbgs() << " Bottom up (preds) "); + LLVM_DEBUG(dbgs() << " Bottom up (preds) "); } else if (succ_L(NodeOrder, N) && isSubset(N, Nodes)) { R.insert(N.begin(), N.end()); Order = TopDown; - DEBUG(dbgs() << " Top down (succs) "); + LLVM_DEBUG(dbgs() << " Top down (succs) "); } else if (isIntersect(N, Nodes, R)) { // If some of the successors are in the existing node-set, then use the // top-down ordering. Order = TopDown; - DEBUG(dbgs() << " Top down (intersect) "); + LLVM_DEBUG(dbgs() << " Top down (intersect) "); } else if (NodeSets.size() == 1) { for (auto &N : Nodes) if (N->Succs.size() == 0) R.insert(N); Order = BottomUp; - DEBUG(dbgs() << " Bottom up (all) "); + LLVM_DEBUG(dbgs() << " Bottom up (all) "); } else { // Find the node with the highest ASAP. SUnit *maxASAP = nullptr; for (SUnit *SU : Nodes) { - if (maxASAP == nullptr || getASAP(SU) >= getASAP(maxASAP)) + if (maxASAP == nullptr || getASAP(SU) > getASAP(maxASAP) || + (getASAP(SU) == getASAP(maxASAP) && SU->NodeNum > maxASAP->NodeNum)) maxASAP = SU; } R.insert(maxASAP); Order = BottomUp; - DEBUG(dbgs() << " Bottom up (default) "); + LLVM_DEBUG(dbgs() << " Bottom up (default) "); } while (!R.empty()) { if (Order == TopDown) { // Choose the node with the maximum height. If more than one, choose - // the node with the lowest MOV. If still more than one, check if there - // is a dependence between the instructions. + // the node wiTH the maximum ZeroLatencyHeight. If still more than one, + // choose the node with the lowest MOV. while (!R.empty()) { SUnit *maxHeight = nullptr; for (SUnit *I : R) { if (maxHeight == nullptr || getHeight(I) > getHeight(maxHeight)) maxHeight = I; else if (getHeight(I) == getHeight(maxHeight) && - getMOV(I) < getMOV(maxHeight) && - !hasDataDependence(maxHeight, I)) + getZeroLatencyHeight(I) > getZeroLatencyHeight(maxHeight)) maxHeight = I; - else if (hasDataDependence(I, maxHeight)) + else if (getHeight(I) == getHeight(maxHeight) && + getZeroLatencyHeight(I) == + getZeroLatencyHeight(maxHeight) && + getMOV(I) < getMOV(maxHeight)) maxHeight = I; } NodeOrder.insert(maxHeight); - DEBUG(dbgs() << maxHeight->NodeNum << " "); + LLVM_DEBUG(dbgs() << maxHeight->NodeNum << " "); R.remove(maxHeight); for (const auto &I : maxHeight->Succs) { if (Nodes.count(I.getSUnit()) == 0) @@ -2080,28 +2161,29 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) { } } Order = BottomUp; - DEBUG(dbgs() << "\n Switching order to bottom up "); + LLVM_DEBUG(dbgs() << "\n Switching order to bottom up "); SmallSetVector<SUnit *, 8> N; if (pred_L(NodeOrder, N, &Nodes)) R.insert(N.begin(), N.end()); } else { // Choose the node with the maximum depth. If more than one, choose - // the node with the lowest MOV. If there is still more than one, check - // for a dependence between the instructions. + // the node with the maximum ZeroLatencyDepth. If still more than one, + // choose the node with the lowest MOV. while (!R.empty()) { SUnit *maxDepth = nullptr; for (SUnit *I : R) { if (maxDepth == nullptr || getDepth(I) > getDepth(maxDepth)) maxDepth = I; else if (getDepth(I) == getDepth(maxDepth) && - getMOV(I) < getMOV(maxDepth) && - !hasDataDependence(I, maxDepth)) + getZeroLatencyDepth(I) > getZeroLatencyDepth(maxDepth)) maxDepth = I; - else if (hasDataDependence(maxDepth, I)) + else if (getDepth(I) == getDepth(maxDepth) && + getZeroLatencyDepth(I) == getZeroLatencyDepth(maxDepth) && + getMOV(I) < getMOV(maxDepth)) maxDepth = I; } NodeOrder.insert(maxDepth); - DEBUG(dbgs() << maxDepth->NodeNum << " "); + LLVM_DEBUG(dbgs() << maxDepth->NodeNum << " "); R.remove(maxDepth); if (Nodes.isExceedSU(maxDepth)) { Order = TopDown; @@ -2114,8 +2196,6 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) { continue; if (NodeOrder.count(I.getSUnit()) != 0) continue; - if (I.getKind() == SDep::Anti) - continue; R.insert(I.getSUnit()); } // Back-edges are predecessors with an anti-dependence. @@ -2130,16 +2210,16 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) { } } Order = TopDown; - DEBUG(dbgs() << "\n Switching order to top down "); + LLVM_DEBUG(dbgs() << "\n Switching order to top down "); SmallSetVector<SUnit *, 8> N; if (succ_L(NodeOrder, N, &Nodes)) R.insert(N.begin(), N.end()); } } - DEBUG(dbgs() << "\nDone with Nodeset\n"); + LLVM_DEBUG(dbgs() << "\nDone with Nodeset\n"); } - DEBUG({ + LLVM_DEBUG({ dbgs() << "Node order: "; for (SUnit *I : NodeOrder) dbgs() << " " << I->NodeNum << " "; @@ -2158,7 +2238,7 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) { for (unsigned II = MII; II < MII + 10 && !scheduleFound; ++II) { Schedule.reset(); Schedule.setInitiationInterval(II); - DEBUG(dbgs() << "Try to schedule with " << II << "\n"); + LLVM_DEBUG(dbgs() << "Try to schedule with " << II << "\n"); SetVector<SUnit *>::iterator NI = NodeOrder.begin(); SetVector<SUnit *>::iterator NE = NodeOrder.end(); @@ -2175,12 +2255,12 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) { int SchedStart = INT_MIN; Schedule.computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart, II, this); - DEBUG({ + LLVM_DEBUG({ dbgs() << "Inst (" << SU->NodeNum << ") "; SU->getInstr()->dump(); dbgs() << "\n"; }); - DEBUG({ + LLVM_DEBUG({ dbgs() << "\tes: " << EarlyStart << " ls: " << LateStart << " me: " << SchedEnd << " ms: " << SchedStart << "\n"; }); @@ -2216,7 +2296,7 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) { Schedule.getMaxStageCount() > (unsigned)SwpMaxStages) scheduleFound = false; - DEBUG({ + LLVM_DEBUG({ if (!scheduleFound) dbgs() << "\tCan't schedule\n"; }); @@ -2227,7 +2307,7 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) { scheduleFound = Schedule.isValidSchedule(this); } - DEBUG(dbgs() << "Schedule Found? " << scheduleFound << "\n"); + LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound << "\n"); if (scheduleFound) Schedule.finalizeSchedule(this); @@ -2250,7 +2330,7 @@ void SwingSchedulerDAG::generatePipelinedLoop(SMSchedule &Schedule) { // Remember the registers that are used in different stages. The index is // the iteration, or stage, that the instruction is scheduled in. This is - // a map between register names in the orignal block and the names created + // a map between register names in the original block and the names created // in each stage of the pipelined loop. ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2]; InstrMapTy InstrMap; @@ -2297,7 +2377,7 @@ void SwingSchedulerDAG::generatePipelinedLoop(SMSchedule &Schedule) { generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule, VRMap, InstrMap, MaxStageCount, MaxStageCount, false); - DEBUG(dbgs() << "New block\n"; KernelBB->dump();); + LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump();); SmallVector<MachineBasicBlock *, 4> EpilogBBs; // Generate the epilog instructions to complete the pipeline. @@ -2315,6 +2395,8 @@ void SwingSchedulerDAG::generatePipelinedLoop(SMSchedule &Schedule) { addBranches(PrologBBs, KernelBB, EpilogBBs, Schedule, VRMap); // Remove the original loop since it's no longer referenced. + for (auto &I : *BB) + LIS.RemoveMachineInstrFromMaps(I); BB->clear(); BB->eraseFromParent(); @@ -2364,7 +2446,7 @@ void SwingSchedulerDAG::generateProlog(SMSchedule &Schedule, unsigned LastStage, } } rewritePhiValues(NewBB, i, Schedule, VRMap, InstrMap); - DEBUG({ + LLVM_DEBUG({ dbgs() << "prolog:\n"; NewBB->dump(); }); @@ -2431,7 +2513,9 @@ void SwingSchedulerDAG::generateEpilog(SMSchedule &Schedule, unsigned LastStage, continue; MachineInstr *In = &BBI; if (Schedule.isScheduledAtStage(getSUnit(In), StageNum)) { - MachineInstr *NewMI = cloneInstr(In, EpilogStage - LastStage, 0); + // Instructions with memoperands in the epilog are updated with + // conservative values. + MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0); updateInstruction(NewMI, i == 1, EpilogStage, 0, Schedule, VRMap); NewBB->push_back(NewMI); InstrMap[NewMI] = In; @@ -2444,7 +2528,7 @@ void SwingSchedulerDAG::generateEpilog(SMSchedule &Schedule, unsigned LastStage, InstrMap, LastStage, EpilogStage, i == 1); PredBB = NewBB; - DEBUG({ + LLVM_DEBUG({ dbgs() << "epilog:\n"; NewBB->dump(); }); @@ -2550,24 +2634,20 @@ void SwingSchedulerDAG::generateExistingPhis( // of the Phi value. unsigned NewReg = VRMap[PrevStage][LoopVal]; rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, 0, &*BBI, - Def, NewReg); + Def, InitVal, NewReg); if (VRMap[CurStageNum].count(LoopVal)) VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal]; } // Adjust the number of Phis needed depending on the number of prologs left, - // and the distance from where the Phi is first scheduled. - unsigned NumPhis = NumStages; - if (!InKernel && (int)PrologStage < LoopValStage) - // The NumPhis is the maximum number of new Phis needed during the steady - // state. If the Phi has not been scheduled in current prolog, then we - // need to generate less Phis. - NumPhis = std::max((int)NumPhis - (int)(LoopValStage - PrologStage), 1); - // The number of Phis cannot exceed the number of prolog stages. Each - // stage can potentially define two values. - NumPhis = std::min(NumPhis, PrologStage + 2); + // and the distance from where the Phi is first scheduled. The number of + // Phis cannot exceed the number of prolog stages. Each stage can + // potentially define two values. + unsigned MaxPhis = PrologStage + 2; + if (!InKernel && (int)PrologStage <= LoopValStage) + MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1); + unsigned NumPhis = std::min(NumStages, MaxPhis); unsigned NewReg = 0; - unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled; // In the epilog, we may need to look back one stage to get the correct // Phi name because the epilog and prolog blocks execute the same stage. @@ -2659,19 +2739,20 @@ void SwingSchedulerDAG::generateExistingPhis( // references another Phi, and the other Phi is scheduled in an // earlier stage. We can try to reuse an existing Phi up until the last // stage of the current Phi. - if (LoopDefIsPhi && (int)PrologStage >= StageScheduled) { + if (LoopDefIsPhi && (int)(PrologStage - np) >= StageScheduled) { int LVNumStages = Schedule.getStagesForPhi(LoopVal); int StageDiff = (StageScheduled - LoopValStage); LVNumStages -= StageDiff; - if (LVNumStages > (int)np) { + // Make sure the loop value Phi has been processed already. + if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) { NewReg = PhiOp2; unsigned ReuseStage = CurStageNum; if (Schedule.isLoopCarried(this, *PhiInst)) ReuseStage -= LVNumStages; // Check if the Phi to reuse has been generated yet. If not, then // there is nothing to reuse. - if (VRMap[ReuseStage].count(LoopVal)) { - NewReg = VRMap[ReuseStage][LoopVal]; + if (VRMap[ReuseStage - np].count(LoopVal)) { + NewReg = VRMap[ReuseStage - np][LoopVal]; rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI, Def, NewReg); @@ -2744,7 +2825,7 @@ void SwingSchedulerDAG::generateExistingPhis( /// Generate Phis for the specified block in the generated pipelined code. /// These are new Phis needed because the definition is scheduled after the -/// use in the pipelened sequence. +/// use in the pipelined sequence. void SwingSchedulerDAG::generatePhis( MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2, MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap, @@ -2874,6 +2955,13 @@ void SwingSchedulerDAG::removeDeadInstructions(MachineBasicBlock *KernelBB, if (!MOI->isReg() || !MOI->isDef()) continue; unsigned reg = MOI->getReg(); + // Assume physical registers are used, unless they are marked dead. + if (TargetRegisterInfo::isPhysicalRegister(reg)) { + used = !MOI->isDead(); + if (used) + break; + continue; + } unsigned realUses = 0; for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg), EI = MRI.use_end(); @@ -2891,6 +2979,7 @@ void SwingSchedulerDAG::removeDeadInstructions(MachineBasicBlock *KernelBB, used = false; } if (!used) { + LIS.RemoveMachineInstrFromMaps(*MI); MI++->eraseFromParent(); continue; } @@ -2905,6 +2994,7 @@ void SwingSchedulerDAG::removeDeadInstructions(MachineBasicBlock *KernelBB, ++BBI; unsigned reg = MI->getOperand(0).getReg(); if (MRI.use_begin(reg) == MRI.use_end()) { + LIS.RemoveMachineInstrFromMaps(*MI); MI->eraseFromParent(); } } @@ -2924,10 +3014,8 @@ void SwingSchedulerDAG::splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs, SMSchedule &Schedule) { const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(), - BBF = KernelBB->getFirstNonPHI(); - BBI != BBF; ++BBI) { - unsigned Def = BBI->getOperand(0).getReg(); + for (auto &PHI : KernelBB->phis()) { + unsigned Def = PHI.getOperand(0).getReg(); // Check for any Phi definition that used as an operand of another Phi // in the same block. for (MachineRegisterInfo::use_instr_iterator I = MRI.use_instr_begin(Def), @@ -2935,7 +3023,7 @@ void SwingSchedulerDAG::splitLifetimes(MachineBasicBlock *KernelBB, I != E; ++I) { if (I->isPHI() && I->getParent() == KernelBB) { // Get the loop carried definition. - unsigned LCDef = getLoopPhiReg(*BBI, KernelBB); + unsigned LCDef = getLoopPhiReg(PHI, KernelBB); if (!LCDef) continue; MachineInstr *MI = MRI.getVRegDef(LCDef); @@ -3099,12 +3187,14 @@ void SwingSchedulerDAG::updateMemOperands(MachineInstr &NewMI, continue; } unsigned Delta; - if (computeDelta(OldMI, Delta)) { + if (Num != UINT_MAX && computeDelta(OldMI, Delta)) { int64_t AdjOffset = Delta * Num; NewMemRefs[Refs++] = MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()); - } else - NewMemRefs[Refs++] = MF.getMachineMemOperand(MMO, 0, UINT64_MAX); + } else { + NewMI.dropMemRefs(); + return; + } } NewMI.setMemRefs(NewMemRefs, NewMemRefs + NumRefs); } @@ -3249,13 +3339,11 @@ void SwingSchedulerDAG::rewritePhiValues(MachineBasicBlock *NewBB, SMSchedule &Schedule, ValueMapTy *VRMap, InstrMapTy &InstrMap) { - for (MachineBasicBlock::iterator BBI = BB->instr_begin(), - BBE = BB->getFirstNonPHI(); - BBI != BBE; ++BBI) { + for (auto &PHI : BB->phis()) { unsigned InitVal = 0; unsigned LoopVal = 0; - getPhiRegs(*BBI, BB, InitVal, LoopVal); - unsigned PhiDef = BBI->getOperand(0).getReg(); + getPhiRegs(PHI, BB, InitVal, LoopVal); + unsigned PhiDef = PHI.getOperand(0).getReg(); unsigned PhiStage = (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(PhiDef))); @@ -3269,7 +3357,7 @@ void SwingSchedulerDAG::rewritePhiValues(MachineBasicBlock *NewBB, getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB); if (!NewVal) NewVal = InitVal; - rewriteScheduledInstr(NewBB, Schedule, InstrMap, StageNum - np, np, &*BBI, + rewriteScheduledInstr(NewBB, Schedule, InstrMap, StageNum - np, np, &PHI, PhiDef, NewVal); } } @@ -3375,10 +3463,15 @@ bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI, if (!TII->getBaseAndOffsetPosition(*PrevDef, BasePos1, OffsetPos1)) return false; - // Make sure offset values are both positive or both negative. + // Make sure that the instructions do not access the same memory location in + // the next iteration. int64_t LoadOffset = MI->getOperand(OffsetPosLd).getImm(); int64_t StoreOffset = PrevDef->getOperand(OffsetPos1).getImm(); - if ((LoadOffset >= 0) != (StoreOffset >= 0)) + MachineInstr *NewMI = MF.CloneMachineInstr(MI); + NewMI->getOperand(OffsetPosLd).setImm(LoadOffset + StoreOffset); + bool Disjoint = TII->areMemAccessesTriviallyDisjoint(*NewMI, *PrevDef); + MF.DeleteMachineInstr(NewMI); + if (!Disjoint) return false; // Set the return value once we determine that we return true. @@ -3425,17 +3518,21 @@ void SwingSchedulerDAG::applyInstrChange(MachineInstr *MI, } } -/// Return true for an order dependence that is loop carried potentially. -/// An order dependence is loop carried if the destination defines a value -/// that may be used by the source in a subsequent iteration. -bool SwingSchedulerDAG::isLoopCarriedOrder(SUnit *Source, const SDep &Dep, - bool isSucc) { - if (!isOrder(Source, Dep) || Dep.isArtificial()) +/// Return true for an order or output dependence that is loop carried +/// potentially. A dependence is loop carried if the destination defines a valu +/// that may be used or defined by the source in a subsequent iteration. +bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep, + bool isSucc) { + if ((Dep.getKind() != SDep::Order && Dep.getKind() != SDep::Output) || + Dep.isArtificial()) return false; if (!SwpPruneLoopCarried) return true; + if (Dep.getKind() == SDep::Output) + return true; + MachineInstr *SI = Source->getInstr(); MachineInstr *DI = Dep.getSUnit()->getInstr(); if (!isSucc) @@ -3465,6 +3562,19 @@ bool SwingSchedulerDAG::isLoopCarriedOrder(SUnit *Source, const SDep &Dep, if (BaseRegS != BaseRegD) return true; + // Check that the base register is incremented by a constant value for each + // iteration. + MachineInstr *Def = MRI.getVRegDef(BaseRegS); + if (!Def || !Def->isPHI()) + return true; + unsigned InitVal = 0; + unsigned LoopVal = 0; + getPhiRegs(*Def, BB, InitVal, LoopVal); + MachineInstr *LoopDef = MRI.getVRegDef(LoopVal); + int D = 0; + if (!LoopDef || !TII->getIncrementValue(*LoopDef, D)) + return true; + uint64_t AccessSizeS = (*SI->memoperands_begin())->getSize(); uint64_t AccessSizeD = (*DI->memoperands_begin())->getSize(); @@ -3516,7 +3626,7 @@ bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) { } if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) || Resources->canReserveResources(*SU->getInstr())) { - DEBUG({ + LLVM_DEBUG({ dbgs() << "\tinsert at cycle " << curCycle << " "; SU->getInstr()->dump(); }); @@ -3529,7 +3639,7 @@ bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) { FirstCycle = curCycle; return true; } - DEBUG({ + LLVM_DEBUG({ dbgs() << "\tfailed to insert at cycle " << curCycle << " "; SU->getInstr()->dump(); }); @@ -3553,7 +3663,7 @@ int SMSchedule::earliestCycleInChain(const SDep &Dep) { continue; EarlyCycle = std::min(EarlyCycle, it->second); for (const auto &PI : PrevSU->Preds) - if (SwingSchedulerDAG::isOrder(PrevSU, PI)) + if (PI.getKind() == SDep::Order || Dep.getKind() == SDep::Output) Worklist.push_back(PI); Visited.insert(PrevSU); } @@ -3576,7 +3686,7 @@ int SMSchedule::latestCycleInChain(const SDep &Dep) { continue; LateCycle = std::max(LateCycle, it->second); for (const auto &SI : SuccSU->Succs) - if (SwingSchedulerDAG::isOrder(SuccSU, SI)) + if (SI.getKind() == SDep::Order || Dep.getKind() == SDep::Output) Worklist.push_back(SI); Visited.insert(SuccSU); } @@ -3590,7 +3700,7 @@ static SUnit *multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG) { for (auto &P : SU->Preds) if (DAG->isBackedge(SU, P) && P.getSUnit()->getInstr()->isPHI()) for (auto &S : P.getSUnit()->Succs) - if (S.getKind() == SDep::Order && S.getSUnit()->getInstr()->isPHI()) + if (S.getKind() == SDep::Data && S.getSUnit()->getInstr()->isPHI()) return P.getSUnit(); return nullptr; } @@ -3601,7 +3711,7 @@ void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG) { // Iterate over each instruction that has been scheduled already. The start - // slot computuation depends on whether the previously scheduled instruction + // slot computation depends on whether the previously scheduled instruction // is a predecessor or successor of the specified instruction. for (int cycle = getFirstCycle(); cycle <= LastCycle; ++cycle) { @@ -3613,15 +3723,15 @@ void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, const SDep &Dep = SU->Preds[i]; if (Dep.getSUnit() == I) { if (!DAG->isBackedge(SU, Dep)) { - int EarlyStart = cycle + DAG->getLatency(SU, Dep) - + int EarlyStart = cycle + Dep.getLatency() - DAG->getDistance(Dep.getSUnit(), SU, Dep) * II; *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart); - if (DAG->isLoopCarriedOrder(SU, Dep, false)) { + if (DAG->isLoopCarriedDep(SU, Dep, false)) { int End = earliestCycleInChain(Dep) + (II - 1); *MinEnd = std::min(*MinEnd, End); } } else { - int LateStart = cycle - DAG->getLatency(SU, Dep) + + int LateStart = cycle - Dep.getLatency() + DAG->getDistance(SU, Dep.getSUnit(), Dep) * II; *MinLateStart = std::min(*MinLateStart, LateStart); } @@ -3633,23 +3743,24 @@ void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, !SU->isPred(I)) *MinLateStart = std::min(*MinLateStart, cycle); } - for (unsigned i = 0, e = (unsigned)SU->Succs.size(); i != e; ++i) + for (unsigned i = 0, e = (unsigned)SU->Succs.size(); i != e; ++i) { if (SU->Succs[i].getSUnit() == I) { const SDep &Dep = SU->Succs[i]; if (!DAG->isBackedge(SU, Dep)) { - int LateStart = cycle - DAG->getLatency(SU, Dep) + + int LateStart = cycle - Dep.getLatency() + DAG->getDistance(SU, Dep.getSUnit(), Dep) * II; *MinLateStart = std::min(*MinLateStart, LateStart); - if (DAG->isLoopCarriedOrder(SU, Dep)) { + if (DAG->isLoopCarriedDep(SU, Dep)) { int Start = latestCycleInChain(Dep) + 1 - II; *MaxStart = std::max(*MaxStart, Start); } } else { - int EarlyStart = cycle + DAG->getLatency(SU, Dep) - + int EarlyStart = cycle + Dep.getLatency() - DAG->getDistance(Dep.getSUnit(), SU, Dep) * II; *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart); } } + } } } } @@ -3657,7 +3768,7 @@ void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, /// Order the instructions within a cycle so that the definitions occur /// before the uses. Returns true if the instruction is added to the start /// of the list, or false if added to the end. -bool SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU, +void SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU, std::deque<SUnit *> &Insts) { MachineInstr *MI = SU->getInstr(); bool OrderBeforeUse = false; @@ -3670,13 +3781,11 @@ bool SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU, unsigned Pos = 0; for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E; ++I, ++Pos) { - // Relative order of Phis does not matter. - if (MI->isPHI() && (*I)->getInstr()->isPHI()) - continue; for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; + unsigned Reg = MO.getReg(); unsigned BasePos, OffsetPos; if (ST.getInstrInfo()->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) @@ -3688,7 +3797,8 @@ bool SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU, (*I)->getInstr()->readsWritesVirtualRegister(Reg); if (MO.isDef() && Reads && stageScheduled(*I) <= StageInst1) { OrderBeforeUse = true; - MoveUse = Pos; + if (MoveUse == 0) + MoveUse = Pos; } else if (MO.isDef() && Reads && stageScheduled(*I) > StageInst1) { // Add the instruction after the scheduled instruction. OrderAfterDef = true; @@ -3696,14 +3806,16 @@ bool SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU, } else if (MO.isUse() && Writes && stageScheduled(*I) == StageInst1) { if (cycleScheduled(*I) == cycleScheduled(SU) && !(*I)->isSucc(SU)) { OrderBeforeUse = true; - MoveUse = Pos; + if (MoveUse == 0) + MoveUse = Pos; } else { OrderAfterDef = true; MoveDef = Pos; } } else if (MO.isUse() && Writes && stageScheduled(*I) > StageInst1) { OrderBeforeUse = true; - MoveUse = Pos; + if (MoveUse == 0) + MoveUse = Pos; if (MoveUse != 0) { OrderAfterDef = true; MoveDef = Pos - 1; @@ -3711,49 +3823,35 @@ bool SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU, } else if (MO.isUse() && Writes && stageScheduled(*I) < StageInst1) { // Add the instruction before the scheduled instruction. OrderBeforeUse = true; - MoveUse = Pos; + if (MoveUse == 0) + MoveUse = Pos; } else if (MO.isUse() && stageScheduled(*I) == StageInst1 && isLoopCarriedDefOfUse(SSD, (*I)->getInstr(), MO)) { - OrderBeforeDef = true; - MoveUse = Pos; + if (MoveUse == 0) { + OrderBeforeDef = true; + MoveUse = Pos; + } } } // Check for order dependences between instructions. Make sure the source // is ordered before the destination. - for (auto &S : SU->Succs) - if (S.getKind() == SDep::Order) { - if (S.getSUnit() == *I && stageScheduled(*I) == StageInst1) { - OrderBeforeUse = true; - MoveUse = Pos; - } - } else if (TargetRegisterInfo::isPhysicalRegister(S.getReg())) { - if (cycleScheduled(SU) != cycleScheduled(S.getSUnit())) { - if (S.isAssignedRegDep()) { - OrderAfterDef = true; - MoveDef = Pos; - } - } else { - OrderBeforeUse = true; + for (auto &S : SU->Succs) { + if (S.getSUnit() != *I) + continue; + if (S.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) { + OrderBeforeUse = true; + if (Pos < MoveUse) MoveUse = Pos; - } } - for (auto &P : SU->Preds) - if (P.getKind() == SDep::Order) { - if (P.getSUnit() == *I && stageScheduled(*I) == StageInst1) { - OrderAfterDef = true; - MoveDef = Pos; - } - } else if (TargetRegisterInfo::isPhysicalRegister(P.getReg())) { - if (cycleScheduled(SU) != cycleScheduled(P.getSUnit())) { - if (P.isAssignedRegDep()) { - OrderBeforeUse = true; - MoveUse = Pos; - } - } else { - OrderAfterDef = true; - MoveDef = Pos; - } + } + for (auto &P : SU->Preds) { + if (P.getSUnit() != *I) + continue; + if (P.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) { + OrderAfterDef = true; + MoveDef = Pos; } + } } // A circular dependence. @@ -3777,16 +3875,10 @@ bool SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU, Insts.erase(Insts.begin() + MoveDef); Insts.erase(Insts.begin() + MoveUse); } - if (orderDependence(SSD, UseSU, Insts)) { - Insts.push_front(SU); - orderDependence(SSD, DefSU, Insts); - return true; - } - Insts.pop_back(); - Insts.push_back(SU); - Insts.push_back(UseSU); + orderDependence(SSD, UseSU, Insts); + orderDependence(SSD, SU, Insts); orderDependence(SSD, DefSU, Insts); - return false; + return; } // Put the new instruction first if there is a use in the list. Otherwise, // put it at the end of the list. @@ -3794,14 +3886,13 @@ bool SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU, Insts.push_front(SU); else Insts.push_back(SU); - return OrderBeforeUse; } /// Return true if the scheduled Phi has a loop carried operand. bool SMSchedule::isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi) { if (!Phi.isPHI()) return false; - assert(Phi.isPHI() && "Expecing a Phi."); + assert(Phi.isPHI() && "Expecting a Phi."); SUnit *DefSU = SSD->getSUnit(&Phi); unsigned DefCycle = cycleScheduled(DefSU); int DefStage = stageScheduled(DefSU); @@ -3868,6 +3959,100 @@ bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) { return true; } +/// A property of the node order in swing-modulo-scheduling is +/// that for nodes outside circuits the following holds: +/// none of them is scheduled after both a successor and a +/// predecessor. +/// The method below checks whether the property is met. +/// If not, debug information is printed and statistics information updated. +/// Note that we do not use an assert statement. +/// The reason is that although an invalid node oder may prevent +/// the pipeliner from finding a pipelined schedule for arbitrary II, +/// it does not lead to the generation of incorrect code. +void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const { + + // a sorted vector that maps each SUnit to its index in the NodeOrder + typedef std::pair<SUnit *, unsigned> UnitIndex; + std::vector<UnitIndex> Indices(NodeOrder.size(), std::make_pair(nullptr, 0)); + + for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i) + Indices.push_back(std::make_pair(NodeOrder[i], i)); + + auto CompareKey = [](UnitIndex i1, UnitIndex i2) { + return std::get<0>(i1) < std::get<0>(i2); + }; + + // sort, so that we can perform a binary search + llvm::sort(Indices.begin(), Indices.end(), CompareKey); + + bool Valid = true; + (void)Valid; + // for each SUnit in the NodeOrder, check whether + // it appears after both a successor and a predecessor + // of the SUnit. If this is the case, and the SUnit + // is not part of circuit, then the NodeOrder is not + // valid. + for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i) { + SUnit *SU = NodeOrder[i]; + unsigned Index = i; + + bool PredBefore = false; + bool SuccBefore = false; + + SUnit *Succ; + SUnit *Pred; + (void)Succ; + (void)Pred; + + for (SDep &PredEdge : SU->Preds) { + SUnit *PredSU = PredEdge.getSUnit(); + unsigned PredIndex = + std::get<1>(*std::lower_bound(Indices.begin(), Indices.end(), + std::make_pair(PredSU, 0), CompareKey)); + if (!PredSU->getInstr()->isPHI() && PredIndex < Index) { + PredBefore = true; + Pred = PredSU; + break; + } + } + + for (SDep &SuccEdge : SU->Succs) { + SUnit *SuccSU = SuccEdge.getSUnit(); + unsigned SuccIndex = + std::get<1>(*std::lower_bound(Indices.begin(), Indices.end(), + std::make_pair(SuccSU, 0), CompareKey)); + if (!SuccSU->getInstr()->isPHI() && SuccIndex < Index) { + SuccBefore = true; + Succ = SuccSU; + break; + } + } + + if (PredBefore && SuccBefore && !SU->getInstr()->isPHI()) { + // instructions in circuits are allowed to be scheduled + // after both a successor and predecessor. + bool InCircuit = std::any_of( + Circuits.begin(), Circuits.end(), + [SU](const NodeSet &Circuit) { return Circuit.count(SU); }); + if (InCircuit) + LLVM_DEBUG(dbgs() << "In a circuit, predecessor ";); + else { + Valid = false; + NumNodeOrderIssues++; + LLVM_DEBUG(dbgs() << "Predecessor ";); + } + LLVM_DEBUG(dbgs() << Pred->NodeNum << " and successor " << Succ->NodeNum + << " are scheduled before node " << SU->NodeNum + << "\n";); + } + } + + LLVM_DEBUG({ + if (!Valid) + dbgs() << "Invalid node order found!\n"; + }); +} + /// Attempt to fix the degenerate cases when the instruction serialization /// causes the register lifetimes to overlap. For example, /// p' = store_pi(p, b) @@ -3987,27 +4172,25 @@ void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) { // generated code. for (int Cycle = getFirstCycle(), E = getFinalCycle(); Cycle <= E; ++Cycle) { std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[Cycle]; - std::deque<SUnit *> newOrderZC; - // Put the zero-cost, pseudo instructions at the start of the cycle. + std::deque<SUnit *> newOrderPhi; for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) { SUnit *SU = cycleInstrs[i]; - if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode())) - orderDependence(SSD, SU, newOrderZC); + if (SU->getInstr()->isPHI()) + newOrderPhi.push_back(SU); } std::deque<SUnit *> newOrderI; - // Then, add the regular instructions back. for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) { SUnit *SU = cycleInstrs[i]; - if (!ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode())) + if (!SU->getInstr()->isPHI()) orderDependence(SSD, SU, newOrderI); } // Replace the old order with the new order. - cycleInstrs.swap(newOrderZC); + cycleInstrs.swap(newOrderPhi); cycleInstrs.insert(cycleInstrs.end(), newOrderI.begin(), newOrderI.end()); SSD->fixupRegisterOverlaps(cycleInstrs); } - DEBUG(dump();); + LLVM_DEBUG(dump();); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/contrib/llvm/lib/CodeGen/MachineRegionInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegionInfo.cpp index 1e74104e89ed..2619d8f78276 100644 --- a/contrib/llvm/lib/CodeGen/MachineRegionInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineRegionInfo.cpp @@ -11,6 +11,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/RegionInfoImpl.h" #include "llvm/CodeGen/MachinePostDominators.h" +#include "llvm/Config/llvm-config.h" #include "llvm/Pass.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" @@ -89,7 +90,7 @@ bool MachineRegionInfoPass::runOnMachineFunction(MachineFunction &F) { RI.recalculate(F, DT, PDT, DF); - DEBUG(RI.dump()); + LLVM_DEBUG(RI.dump()); return false; } diff --git a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp index b82ab02a6e6c..6095bdd06b69 100644 --- a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" @@ -65,23 +66,66 @@ void MachineRegisterInfo::setRegBank(unsigned Reg, VRegInfo[Reg].first = &RegBank; } -const TargetRegisterClass * -MachineRegisterInfo::constrainRegClass(unsigned Reg, - const TargetRegisterClass *RC, - unsigned MinNumRegs) { - const TargetRegisterClass *OldRC = getRegClass(Reg); +static const TargetRegisterClass * +constrainRegClass(MachineRegisterInfo &MRI, unsigned Reg, + const TargetRegisterClass *OldRC, + const TargetRegisterClass *RC, unsigned MinNumRegs) { if (OldRC == RC) return RC; const TargetRegisterClass *NewRC = - getTargetRegisterInfo()->getCommonSubClass(OldRC, RC); + MRI.getTargetRegisterInfo()->getCommonSubClass(OldRC, RC); if (!NewRC || NewRC == OldRC) return NewRC; if (NewRC->getNumRegs() < MinNumRegs) return nullptr; - setRegClass(Reg, NewRC); + MRI.setRegClass(Reg, NewRC); return NewRC; } +const TargetRegisterClass * +MachineRegisterInfo::constrainRegClass(unsigned Reg, + const TargetRegisterClass *RC, + unsigned MinNumRegs) { + return ::constrainRegClass(*this, Reg, getRegClass(Reg), RC, MinNumRegs); +} + +bool +MachineRegisterInfo::constrainRegAttrs(unsigned Reg, + unsigned ConstrainingReg, + unsigned MinNumRegs) { + auto const *OldRC = getRegClassOrNull(Reg); + auto const *RC = getRegClassOrNull(ConstrainingReg); + // A virtual register at any point must have either a low-level type + // or a class assigned, but not both. The only exception is the internals of + // GlobalISel's instruction selection pass, which is allowed to temporarily + // introduce registers with types and classes both. + assert((OldRC || getType(Reg).isValid()) && "Reg has neither class nor type"); + assert((!OldRC || !getType(Reg).isValid()) && "Reg has class and type both"); + assert((RC || getType(ConstrainingReg).isValid()) && + "ConstrainingReg has neither class nor type"); + assert((!RC || !getType(ConstrainingReg).isValid()) && + "ConstrainingReg has class and type both"); + if (OldRC && RC) + return ::constrainRegClass(*this, Reg, OldRC, RC, MinNumRegs); + // If one of the virtual registers is generic (used in generic machine + // instructions, has a low-level type, doesn't have a class), and the other is + // concrete (used in target specific instructions, doesn't have a low-level + // type, has a class), we can not unify them. + if (OldRC || RC) + return false; + // At this point, both registers are guaranteed to have a valid low-level + // type, and they must agree. + if (getType(Reg) != getType(ConstrainingReg)) + return false; + auto const *OldRB = getRegBankOrNull(Reg); + auto const *RB = getRegBankOrNull(ConstrainingReg); + if (OldRB) + return !RB || RB == OldRB; + if (RB) + setRegBank(Reg, *RB); + return true; +} + bool MachineRegisterInfo::recomputeRegClass(unsigned Reg) { const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); @@ -107,10 +151,11 @@ MachineRegisterInfo::recomputeRegClass(unsigned Reg) { return true; } -unsigned MachineRegisterInfo::createIncompleteVirtualRegister() { +unsigned MachineRegisterInfo::createIncompleteVirtualRegister(StringRef Name) { unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs()); VRegInfo.grow(Reg); RegAllocHints.grow(Reg); + insertVRegByName(Name, Reg); return Reg; } @@ -118,47 +163,42 @@ unsigned MachineRegisterInfo::createIncompleteVirtualRegister() { /// function with the specified register class. /// unsigned -MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){ +MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass, + StringRef Name) { assert(RegClass && "Cannot create register without RegClass!"); assert(RegClass->isAllocatable() && "Virtual register RegClass must be allocatable."); // New virtual register number. - unsigned Reg = createIncompleteVirtualRegister(); + unsigned Reg = createIncompleteVirtualRegister(Name); VRegInfo[Reg].first = RegClass; if (TheDelegate) TheDelegate->MRI_NoteNewVirtualRegister(Reg); return Reg; } -LLT MachineRegisterInfo::getType(unsigned VReg) const { - VRegToTypeMap::const_iterator TypeIt = getVRegToType().find(VReg); - return TypeIt != getVRegToType().end() ? TypeIt->second : LLT{}; -} - void MachineRegisterInfo::setType(unsigned VReg, LLT Ty) { // Check that VReg doesn't have a class. assert((getRegClassOrRegBank(VReg).isNull() || !getRegClassOrRegBank(VReg).is<const TargetRegisterClass *>()) && "Can't set the size of a non-generic virtual register"); - getVRegToType()[VReg] = Ty; + VRegToType.grow(VReg); + VRegToType[VReg] = Ty; } unsigned -MachineRegisterInfo::createGenericVirtualRegister(LLT Ty) { +MachineRegisterInfo::createGenericVirtualRegister(LLT Ty, StringRef Name) { // New virtual register number. - unsigned Reg = createIncompleteVirtualRegister(); + unsigned Reg = createIncompleteVirtualRegister(Name); // FIXME: Should we use a dummy register class? VRegInfo[Reg].first = static_cast<RegisterBank *>(nullptr); - getVRegToType()[Reg] = Ty; + setType(Reg, Ty); if (TheDelegate) TheDelegate->MRI_NoteNewVirtualRegister(Reg); return Reg; } -void MachineRegisterInfo::clearVirtRegTypes() { - getVRegToType().clear(); -} +void MachineRegisterInfo::clearVirtRegTypes() { VRegToType.clear(); } /// clearVirtRegs - Remove all virtual registers (after physreg assignment). void MachineRegisterInfo::clearVirtRegs() { diff --git a/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp index 36844e9fb30a..773661965f18 100644 --- a/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp +++ b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp @@ -204,7 +204,7 @@ unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) { // If the client wants to know about all new instructions, tell it. if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); - DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n"); + LLVM_DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n"); return InsertedPHI->getOperand(0).getReg(); } diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp index e15eb658a05c..502d18f08f93 100644 --- a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp @@ -32,7 +32,6 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachinePassRegistry.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/RegisterPressure.h" @@ -48,6 +47,7 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" @@ -55,6 +55,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GraphWriter.h" +#include "llvm/Support/MachineValueType.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cassert> @@ -271,7 +272,7 @@ priorNonDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator Beg) { assert(I != Beg && "reached the top of the region, cannot decrement"); while (--I != Beg) { - if (!I->isDebugValue()) + if (!I->isDebugInstr()) break; } return I; @@ -291,7 +292,7 @@ static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End) { for(; I != End; ++I) { - if (!I->isDebugValue()) + if (!I->isDebugInstr()) break; } return I; @@ -344,7 +345,7 @@ ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() { /// This design avoids exposing scheduling boundaries to the DAG builder, /// simplifying the DAG builder's support for "special" target instructions. /// At the same time the design allows target schedulers to operate across -/// scheduling boundaries, for example to bundle the boudary instructions +/// scheduling boundaries, for example to bundle the boundary instructions /// without reordering them. This creates complexity, because the target /// scheduler must update the RegionBegin and RegionEnd positions cached by /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler @@ -360,7 +361,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { } else if (!mf.getSubtarget().enableMachineScheduler()) return false; - DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs())); + LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs())); // Initialize the context of the pass. MF = &mf; @@ -372,7 +373,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { LIS = &getAnalysis<LiveIntervals>(); if (VerifyScheduling) { - DEBUG(LIS->dump()); + LLVM_DEBUG(LIS->dump()); MF->verify(this, "Before machine scheduling."); } RegClassInfo->runOnMachineFunction(*MF); @@ -382,7 +383,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler()); scheduleRegions(*Scheduler, false); - DEBUG(LIS->dump()); + LLVM_DEBUG(LIS->dump()); if (VerifyScheduling) MF->verify(this, "After machine scheduling."); return true; @@ -396,10 +397,10 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) { if (!EnablePostRAMachineSched) return false; } else if (!mf.getSubtarget().enablePostRAScheduler()) { - DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n"); + LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n"); return false; } - DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs())); + LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs())); // Initialize the context of the pass. MF = &mf; @@ -481,7 +482,7 @@ getSchedRegions(MachineBasicBlock *MBB, MachineInstr &MI = *std::prev(I); if (isSchedBoundary(&MI, &*MBB, MF, TII)) break; - if (!MI.isDebugValue()) + if (!MI.isDebugInstr()) // MBB::size() uses instr_iterator to count. Here we need a bundle to // count as a single instruction. ++NumRegionInstrs; @@ -547,12 +548,13 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler, Scheduler.exitRegion(); continue; } - DEBUG(dbgs() << "********** MI Scheduling **********\n"); - DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB) << " " - << MBB->getName() << "\n From: " << *I << " To: "; - if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; - else dbgs() << "End"; - dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n'); + LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n"); + LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB) + << " " << MBB->getName() << "\n From: " << *I + << " To: "; + if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; + else dbgs() << "End"; + dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n'); if (DumpCriticalPathLength) { errs() << MF->getName(); errs() << ":%bb. " << MBB->getNumber(); @@ -749,8 +751,8 @@ bool ScheduleDAGMI::checkSchedLimit() { /// does not consider liveness or register pressure. It is useful for PostRA /// scheduling and potentially other custom schedulers. void ScheduleDAGMI::schedule() { - DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n"); - DEBUG(SchedImpl->dumpPolicy()); + LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n"); + LLVM_DEBUG(SchedImpl->dumpPolicy()); // Build the DAG. buildSchedGraph(AA); @@ -762,26 +764,22 @@ void ScheduleDAGMI::schedule() { SmallVector<SUnit*, 8> TopRoots, BotRoots; findRootsAndBiasEdges(TopRoots, BotRoots); + LLVM_DEBUG(if (EntrySU.getInstr() != nullptr) EntrySU.dumpAll(this); + for (const SUnit &SU + : SUnits) SU.dumpAll(this); + if (ExitSU.getInstr() != nullptr) ExitSU.dumpAll(this);); + if (ViewMISchedDAGs) viewGraph(); + // Initialize the strategy before modifying the DAG. // This may initialize a DFSResult to be used for queue priority. SchedImpl->initialize(this); - DEBUG( - if (EntrySU.getInstr() != nullptr) - EntrySU.dumpAll(this); - for (const SUnit &SU : SUnits) - SU.dumpAll(this); - if (ExitSU.getInstr() != nullptr) - ExitSU.dumpAll(this); - ); - if (ViewMISchedDAGs) viewGraph(); - // Initialize ready queues now that the DAG and priority data are finalized. initQueues(TopRoots, BotRoots); bool IsTopNode = false; while (true) { - DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n"); + LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n"); SUnit *SU = SchedImpl->pickNode(IsTopNode); if (!SU) break; @@ -821,7 +819,7 @@ void ScheduleDAGMI::schedule() { placeDebugValues(); - DEBUG({ + LLVM_DEBUG({ dbgs() << "*** Final schedule for " << printMBBReference(*begin()->getParent()) << " ***\n"; dumpSchedule(); @@ -1016,7 +1014,7 @@ void ScheduleDAGMILive::initRegPressure() { // Close the RPTracker to finalize live ins. RPTracker.closeRegion(); - DEBUG(RPTracker.dump()); + LLVM_DEBUG(RPTracker.dump()); // Initialize the live ins and live outs. TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs); @@ -1031,8 +1029,8 @@ void ScheduleDAGMILive::initRegPressure() { BotRPTracker.initLiveThru(RPTracker); if (!BotRPTracker.getLiveThru().empty()) { TopRPTracker.initLiveThru(BotRPTracker.getLiveThru()); - DEBUG(dbgs() << "Live Thru: "; - dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI)); + LLVM_DEBUG(dbgs() << "Live Thru: "; + dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI)); }; // For each live out vreg reduce the pressure change associated with other @@ -1046,15 +1044,13 @@ void ScheduleDAGMILive::initRegPressure() { updatePressureDiffs(LiveUses); } - DEBUG( - dbgs() << "Top Pressure:\n"; - dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI); - dbgs() << "Bottom Pressure:\n"; - dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI); - ); + LLVM_DEBUG(dbgs() << "Top Pressure:\n"; + dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI); + dbgs() << "Bottom Pressure:\n"; + dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI);); assert((BotRPTracker.getPos() == RegionEnd || - (RegionEnd->isDebugValue() && + (RegionEnd->isDebugInstr() && BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) && "Can't find the region bottom"); @@ -1066,17 +1062,16 @@ void ScheduleDAGMILive::initRegPressure() { for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) { unsigned Limit = RegClassInfo->getRegPressureSetLimit(i); if (RegionPressure[i] > Limit) { - DEBUG(dbgs() << TRI->getRegPressureSetName(i) - << " Limit " << Limit - << " Actual " << RegionPressure[i] << "\n"); + LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit + << " Actual " << RegionPressure[i] << "\n"); RegionCriticalPSets.push_back(PressureChange(i)); } } - DEBUG(dbgs() << "Excess PSets: "; - for (const PressureChange &RCPS : RegionCriticalPSets) - dbgs() << TRI->getRegPressureSetName( - RCPS.getPSet()) << " "; - dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Excess PSets: "; + for (const PressureChange &RCPS + : RegionCriticalPSets) dbgs() + << TRI->getRegPressureSetName(RCPS.getPSet()) << " "; + dbgs() << "\n"); } void ScheduleDAGMILive:: @@ -1097,10 +1092,11 @@ updateScheduledPressure(const SUnit *SU, } unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID); if (NewMaxPressure[ID] >= Limit - 2) { - DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": " - << NewMaxPressure[ID] - << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ") << Limit - << "(+ " << BotRPTracker.getLiveThru()[ID] << " livethru)\n"); + LLVM_DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": " + << NewMaxPressure[ID] + << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ") + << Limit << "(+ " << BotRPTracker.getLiveThru()[ID] + << " livethru)\n"); } } } @@ -1130,17 +1126,14 @@ void ScheduleDAGMILive::updatePressureDiffs( PressureDiff &PDiff = getPressureDiff(&SU); PDiff.addPressureChange(Reg, Decrement, &MRI); - DEBUG( - dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") " - << printReg(Reg, TRI) << ':' << PrintLaneMask(P.LaneMask) - << ' ' << *SU.getInstr(); - dbgs() << " to "; - PDiff.dump(*TRI); - ); + LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") " + << printReg(Reg, TRI) << ':' + << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr(); + dbgs() << " to "; PDiff.dump(*TRI);); } } else { assert(P.LaneMask.any()); - DEBUG(dbgs() << " LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n"); + LLVM_DEBUG(dbgs() << " LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n"); // This may be called before CurrentBottom has been initialized. However, // BotRPTracker must have a valid position. We want the value live into the // instruction or live out of the block, so ask for the previous @@ -1168,12 +1161,9 @@ void ScheduleDAGMILive::updatePressureDiffs( if (LRQ.valueIn() == VNI) { PressureDiff &PDiff = getPressureDiff(SU); PDiff.addPressureChange(Reg, true, &MRI); - DEBUG( - dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") " - << *SU->getInstr(); - dbgs() << " to "; - PDiff.dump(*TRI); - ); + LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") " + << *SU->getInstr(); + dbgs() << " to "; PDiff.dump(*TRI);); } } } @@ -1192,8 +1182,8 @@ void ScheduleDAGMILive::updatePressureDiffs( /// ScheduleDAGMILive then it will want to override this virtual method in order /// to update any specialized state. void ScheduleDAGMILive::schedule() { - DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n"); - DEBUG(SchedImpl->dumpPolicy()); + LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n"); + LLVM_DEBUG(SchedImpl->dumpPolicy()); buildDAGWithRegPressure(); Topo.InitDAGTopologicalSorting(); @@ -1207,26 +1197,22 @@ void ScheduleDAGMILive::schedule() { // This may initialize a DFSResult to be used for queue priority. SchedImpl->initialize(this); - DEBUG( - if (EntrySU.getInstr() != nullptr) - EntrySU.dumpAll(this); - for (const SUnit &SU : SUnits) { - SU.dumpAll(this); - if (ShouldTrackPressure) { - dbgs() << " Pressure Diff : "; - getPressureDiff(&SU).dump(*TRI); - } - dbgs() << " Single Issue : "; - if (SchedModel.mustBeginGroup(SU.getInstr()) && - SchedModel.mustEndGroup(SU.getInstr())) - dbgs() << "true;"; - else - dbgs() << "false;"; - dbgs() << '\n'; - } - if (ExitSU.getInstr() != nullptr) - ExitSU.dumpAll(this); - ); + LLVM_DEBUG(if (EntrySU.getInstr() != nullptr) EntrySU.dumpAll(this); + for (const SUnit &SU + : SUnits) { + SU.dumpAll(this); + if (ShouldTrackPressure) { + dbgs() << " Pressure Diff : "; + getPressureDiff(&SU).dump(*TRI); + } + dbgs() << " Single Issue : "; + if (SchedModel.mustBeginGroup(SU.getInstr()) && + SchedModel.mustEndGroup(SU.getInstr())) + dbgs() << "true;"; + else + dbgs() << "false;"; + dbgs() << '\n'; + } if (ExitSU.getInstr() != nullptr) ExitSU.dumpAll(this);); if (ViewMISchedDAGs) viewGraph(); // Initialize ready queues now that the DAG and priority data are finalized. @@ -1234,7 +1220,7 @@ void ScheduleDAGMILive::schedule() { bool IsTopNode = false; while (true) { - DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n"); + LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n"); SUnit *SU = SchedImpl->pickNode(IsTopNode); if (!SU) break; @@ -1262,7 +1248,7 @@ void ScheduleDAGMILive::schedule() { placeDebugValues(); - DEBUG({ + LLVM_DEBUG({ dbgs() << "*** Final schedule for " << printMBBReference(*begin()->getParent()) << " ***\n"; dumpSchedule(); @@ -1379,13 +1365,13 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() { } else CyclicLatency = 0; - DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU(" - << SU->NodeNum << ") = " << CyclicLatency << "c\n"); + LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU(" + << SU->NodeNum << ") = " << CyclicLatency << "c\n"); if (CyclicLatency > MaxCyclicLatency) MaxCyclicLatency = CyclicLatency; } } - DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n"); + LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n"); return MaxCyclicLatency; } @@ -1429,10 +1415,8 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) { TopRPTracker.advance(RegOpers); assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); - DEBUG( - dbgs() << "Top Pressure:\n"; - dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI); - ); + LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure( + TopRPTracker.getRegSetPressureAtPos(), TRI);); updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure); } @@ -1449,6 +1433,7 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) { } moveInstruction(MI, CurrentBottom); CurrentBottom = MI; + BotRPTracker.setPos(CurrentBottom); } if (ShouldTrackPressure) { RegisterOperands RegOpers; @@ -1467,10 +1452,8 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) { SmallVector<RegisterMaskPair, 8> LiveUses; BotRPTracker.recede(RegOpers, &LiveUses); assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); - DEBUG( - dbgs() << "Bottom Pressure:\n"; - dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI); - ); + LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure( + BotRPTracker.getRegSetPressureAtPos(), TRI);); updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure); updatePressureDiffs(LiveUses); @@ -1484,7 +1467,7 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) { namespace { -/// \brief Post-process the DAG to create cluster edges between neighboring +/// Post-process the DAG to create cluster edges between neighboring /// loads or between neighboring stores. class BaseMemOpClusterMutation : public ScheduleDAGMutation { struct MemOpInfo { @@ -1561,7 +1544,7 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps( if (MemOpRecords.size() < 2) return; - std::sort(MemOpRecords.begin(), MemOpRecords.end()); + llvm::sort(MemOpRecords.begin(), MemOpRecords.end()); unsigned ClusterLength = 1; for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) { SUnit *SUa = MemOpRecords[Idx].SU; @@ -1570,8 +1553,8 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps( *SUb->getInstr(), MemOpRecords[Idx+1].BaseReg, ClusterLength) && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) { - DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU(" - << SUb->NodeNum << ")\n"); + LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU(" + << SUb->NodeNum << ")\n"); // Copy successor edges from SUa to SUb. Interleaving computation // dependent on SUa can prevent load combining due to register reuse. // Predecessor edges do not need to be copied from SUb to SUa since nearby @@ -1579,7 +1562,8 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps( for (const SDep &Succ : SUa->Succs) { if (Succ.getSUnit() == SUb) continue; - DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum << ")\n"); + LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum + << ")\n"); DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial)); } ++ClusterLength; @@ -1588,7 +1572,7 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps( } } -/// \brief Callback from DAG postProcessing to create cluster edges for loads. +/// Callback from DAG postProcessing to create cluster edges for loads. void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) { ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); @@ -1629,7 +1613,7 @@ void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) { namespace { -/// \brief Post-process the DAG to create weak edges from all uses of a copy to +/// Post-process the DAG to create weak edges from all uses of a copy to /// the one use that defines the copy's source vreg, most likely an induction /// variable increment. class CopyConstrain : public ScheduleDAGMutation { @@ -1724,7 +1708,7 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) { // If GlobalSegment is killed at the LocalLI->start, the call to find() // returned the next global segment. But if GlobalSegment overlaps with - // LocalLI->start, then advance to the next segement. If a hole in GlobalLI + // LocalLI->start, then advance to the next segment. If a hole in GlobalLI // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole. if (GlobalSegment->contains(LocalLI->beginIndex())) ++GlobalSegment; @@ -1788,23 +1772,23 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) { return; GlobalUses.push_back(Pred.getSUnit()); } - DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n"); + LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n"); // Add the weak edges. for (SmallVectorImpl<SUnit*>::const_iterator I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) { - DEBUG(dbgs() << " Local use SU(" << (*I)->NodeNum << ") -> SU(" - << GlobalSU->NodeNum << ")\n"); + LLVM_DEBUG(dbgs() << " Local use SU(" << (*I)->NodeNum << ") -> SU(" + << GlobalSU->NodeNum << ")\n"); DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak)); } for (SmallVectorImpl<SUnit*>::const_iterator I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) { - DEBUG(dbgs() << " Global use SU(" << (*I)->NodeNum << ") -> SU(" - << FirstLocalSU->NodeNum << ")\n"); + LLVM_DEBUG(dbgs() << " Global use SU(" << (*I)->NodeNum << ") -> SU(" + << FirstLocalSU->NodeNum << ")\n"); DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak)); } } -/// \brief Callback from DAG postProcessing to create weak edges to encourage +/// Callback from DAG postProcessing to create weak edges to encourage /// copy elimination. void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) { ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); @@ -1941,7 +1925,7 @@ getNextResourceCycle(unsigned PIdx, unsigned Cycles) { /// The scheduler supports two modes of hazard recognition. The first is the /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that /// supports highly complicated in-order reservation tables -/// (ScoreboardHazardRecognizer) and arbitraty target-specific logic. +/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic. /// /// The second is a streamlined mechanism that checks for hazards based on /// simple counters that the scheduler itself maintains. It explicitly checks @@ -1957,16 +1941,16 @@ bool SchedBoundary::checkHazard(SUnit *SU) { unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) { - DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops=" - << SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); + LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops=" + << SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); return true; } if (CurrMOps > 0 && ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) || (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) { - DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must " - << (isTop()? "begin" : "end") << " group\n"); + LLVM_DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must " + << (isTop() ? "begin" : "end") << " group\n"); return true; } @@ -1982,9 +1966,9 @@ bool SchedBoundary::checkHazard(SUnit *SU) { #ifndef NDEBUG MaxObservedStall = std::max(Cycles, MaxObservedStall); #endif - DEBUG(dbgs() << " SU(" << SU->NodeNum << ") " - << SchedModel->getResourceName(ResIdx) - << "=" << NRCycle << "c\n"); + LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") " + << SchedModel->getResourceName(ResIdx) << "=" + << NRCycle << "c\n"); return true; } } @@ -2005,8 +1989,8 @@ findMaxLatency(ArrayRef<SUnit*> ReadySUs) { } } if (LateSU) { - DEBUG(dbgs() << Available.getName() << " RemLatency SU(" - << LateSU->NodeNum << ") " << RemLatency << "c\n"); + LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU(" + << LateSU->NodeNum << ") " << RemLatency << "c\n"); } return RemLatency; } @@ -2022,8 +2006,8 @@ getOtherResourceCount(unsigned &OtherCritIdx) { unsigned OtherCritCount = Rem->RemIssueCount + (RetiredMOps * SchedModel->getMicroOpFactor()); - DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: " - << OtherCritCount / SchedModel->getMicroOpFactor() << '\n'); + LLVM_DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: " + << OtherCritCount / SchedModel->getMicroOpFactor() << '\n'); for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds(); PIdx != PEnd; ++PIdx) { unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx]; @@ -2033,9 +2017,10 @@ getOtherResourceCount(unsigned &OtherCritIdx) { } } if (OtherCritIdx) { - DEBUG(dbgs() << " " << Available.getName() << " + Remain CritRes: " - << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx) - << " " << SchedModel->getResourceName(OtherCritIdx) << "\n"); + LLVM_DEBUG( + dbgs() << " " << Available.getName() << " + Remain CritRes: " + << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx) + << " " << SchedModel->getResourceName(OtherCritIdx) << "\n"); } return OtherCritCount; } @@ -2099,7 +2084,8 @@ void SchedBoundary::bumpCycle(unsigned NextCycle) { checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(), getScheduledLatency()); - DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n'); + LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() + << '\n'); } void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) { @@ -2119,8 +2105,8 @@ unsigned SchedBoundary:: countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) { unsigned Factor = SchedModel->getResourceFactor(PIdx); unsigned Count = Factor * Cycles; - DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) - << " +" << Cycles << "x" << Factor << "u\n"); + LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +" + << Cycles << "x" << Factor << "u\n"); // Update Executed resources counts. incExecutedResources(PIdx, Count); @@ -2131,16 +2117,17 @@ countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) { // becomes the critical resource. if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) { ZoneCritResIdx = PIdx; - DEBUG(dbgs() << " *** Critical resource " - << SchedModel->getResourceName(PIdx) << ": " - << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n"); + LLVM_DEBUG(dbgs() << " *** Critical resource " + << SchedModel->getResourceName(PIdx) << ": " + << getResourceCount(PIdx) / SchedModel->getLatencyFactor() + << "c\n"); } // For reserved resources, record the highest cycle using the resource. unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles); if (NextAvailable > CurrCycle) { - DEBUG(dbgs() << " Resource conflict: " - << SchedModel->getProcResource(PIdx)->Name << " reserved until @" - << NextAvailable << "\n"); + LLVM_DEBUG(dbgs() << " Resource conflict: " + << SchedModel->getProcResource(PIdx)->Name + << " reserved until @" << NextAvailable << "\n"); } return NextAvailable; } @@ -2165,7 +2152,7 @@ void SchedBoundary::bumpNode(SUnit *SU) { "Cannot schedule this instruction's MicroOps in the current cycle."); unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle); - DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n"); + LLVM_DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n"); unsigned NextCycle = CurrCycle; switch (SchedModel->getMicroOpBufferSize()) { @@ -2175,7 +2162,7 @@ void SchedBoundary::bumpNode(SUnit *SU) { case 1: if (ReadyCycle > NextCycle) { NextCycle = ReadyCycle; - DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n"); + LLVM_DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n"); } break; default: @@ -2204,8 +2191,9 @@ void SchedBoundary::bumpNode(SUnit *SU) { if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx)) >= (int)SchedModel->getLatencyFactor()) { ZoneCritResIdx = 0; - DEBUG(dbgs() << " *** Critical resource NumMicroOps: " - << ScaledMOps / SchedModel->getLatencyFactor() << "c\n"); + LLVM_DEBUG(dbgs() << " *** Critical resource NumMicroOps: " + << ScaledMOps / SchedModel->getLatencyFactor() + << "c\n"); } } for (TargetSchedModel::ProcResIter @@ -2241,13 +2229,13 @@ void SchedBoundary::bumpNode(SUnit *SU) { unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency; if (SU->getDepth() > TopLatency) { TopLatency = SU->getDepth(); - DEBUG(dbgs() << " " << Available.getName() - << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n"); + LLVM_DEBUG(dbgs() << " " << Available.getName() << " TopLatency SU(" + << SU->NodeNum << ") " << TopLatency << "c\n"); } if (SU->getHeight() > BotLatency) { BotLatency = SU->getHeight(); - DEBUG(dbgs() << " " << Available.getName() - << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n"); + LLVM_DEBUG(dbgs() << " " << Available.getName() << " BotLatency SU(" + << SU->NodeNum << ") " << BotLatency << "c\n"); } // If we stall for any reason, bump the cycle. if (NextCycle > CurrCycle) @@ -2271,17 +2259,17 @@ void SchedBoundary::bumpNode(SUnit *SU) { // currCycle to X. if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) || (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) { - DEBUG(dbgs() << " Bump cycle to " - << (isTop() ? "end" : "begin") << " group\n"); + LLVM_DEBUG(dbgs() << " Bump cycle to " << (isTop() ? "end" : "begin") + << " group\n"); bumpCycle(++NextCycle); } while (CurrMOps >= SchedModel->getIssueWidth()) { - DEBUG(dbgs() << " *** Max MOps " << CurrMOps - << " at cycle " << CurrCycle << '\n'); + LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle " + << CurrCycle << '\n'); bumpCycle(++NextCycle); } - DEBUG(dumpScheduledState()); + LLVM_DEBUG(dumpScheduledState()); } /// Release pending ready nodes in to the available queue. This makes them @@ -2354,8 +2342,8 @@ SUnit *SchedBoundary::pickOnlyChoice() { releasePending(); } - DEBUG(Pending.dump()); - DEBUG(Available.dump()); + LLVM_DEBUG(Pending.dump()); + LLVM_DEBUG(Available.dump()); if (Available.size() == 1) return *Available.begin(); @@ -2453,27 +2441,24 @@ void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA, if (!OtherResLimited) { if (IsPostRA || (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)) { Policy.ReduceLatency |= true; - DEBUG(dbgs() << " " << CurrZone.Available.getName() - << " RemainingLatency " << RemLatency << " + " - << CurrZone.getCurrCycle() << "c > CritPath " - << Rem.CriticalPath << "\n"); + LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName() + << " RemainingLatency " << RemLatency << " + " + << CurrZone.getCurrCycle() << "c > CritPath " + << Rem.CriticalPath << "\n"); } } // If the same resource is limiting inside and outside the zone, do nothing. if (CurrZone.getZoneCritResIdx() == OtherCritIdx) return; - DEBUG( - if (CurrZone.isResourceLimited()) { - dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: " - << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) - << "\n"; - } - if (OtherResLimited) - dbgs() << " RemainingLimit: " - << SchedModel->getResourceName(OtherCritIdx) << "\n"; - if (!CurrZone.isResourceLimited() && !OtherResLimited) - dbgs() << " Latency limited both directions.\n"); + LLVM_DEBUG(if (CurrZone.isResourceLimited()) { + dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: " + << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n"; + } if (OtherResLimited) dbgs() + << " RemainingLimit: " + << SchedModel->getResourceName(OtherCritIdx) << "\n"; + if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs() + << " Latency limited both directions.\n"); if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx) Policy.ReduceResIdx = CurrZone.getZoneCritResIdx(); @@ -2560,11 +2545,12 @@ void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) { } #endif +namespace llvm { /// Return true if this heuristic determines order. -static bool tryLess(int TryVal, int CandVal, - GenericSchedulerBase::SchedCandidate &TryCand, - GenericSchedulerBase::SchedCandidate &Cand, - GenericSchedulerBase::CandReason Reason) { +bool tryLess(int TryVal, int CandVal, + GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + GenericSchedulerBase::CandReason Reason) { if (TryVal < CandVal) { TryCand.Reason = Reason; return true; @@ -2577,10 +2563,10 @@ static bool tryLess(int TryVal, int CandVal, return false; } -static bool tryGreater(int TryVal, int CandVal, - GenericSchedulerBase::SchedCandidate &TryCand, - GenericSchedulerBase::SchedCandidate &Cand, - GenericSchedulerBase::CandReason Reason) { +bool tryGreater(int TryVal, int CandVal, + GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + GenericSchedulerBase::CandReason Reason) { if (TryVal > CandVal) { TryCand.Reason = Reason; return true; @@ -2593,9 +2579,9 @@ static bool tryGreater(int TryVal, int CandVal, return false; } -static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, - GenericSchedulerBase::SchedCandidate &Cand, - SchedBoundary &Zone) { +bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + SchedBoundary &Zone) { if (Zone.isTop()) { if (Cand.SU->getDepth() > Zone.getScheduledLatency()) { if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), @@ -2617,10 +2603,11 @@ static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, } return false; } +} // end namespace llvm static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) { - DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ") - << GenericSchedulerBase::getReasonStr(Reason) << '\n'); + LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ") + << GenericSchedulerBase::getReasonStr(Reason) << '\n'); } static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand) { @@ -2742,14 +2729,14 @@ void GenericScheduler::checkAcyclicLatency() { Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit; - DEBUG(dbgs() << "IssueCycles=" - << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c " - << "IterCycles=" << IterCount / SchedModel->getLatencyFactor() - << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount - << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor() - << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n"; - if (Rem.IsAcyclicLatencyLimited) - dbgs() << " ACYCLIC LATENCY LIMIT\n"); + LLVM_DEBUG( + dbgs() << "IssueCycles=" + << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c " + << "IterCycles=" << IterCount / SchedModel->getLatencyFactor() + << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount + << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor() + << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n"; + if (Rem.IsAcyclicLatencyLimited) dbgs() << " ACYCLIC LATENCY LIMIT\n"); } void GenericScheduler::registerRoots() { @@ -2760,7 +2747,7 @@ void GenericScheduler::registerRoots() { if (SU->getDepth() > Rem.CriticalPath) Rem.CriticalPath = SU->getDepth(); } - DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n'); + LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n'); if (DumpCriticalPathLength) { errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n"; } @@ -2771,13 +2758,14 @@ void GenericScheduler::registerRoots() { } } -static bool tryPressure(const PressureChange &TryP, - const PressureChange &CandP, - GenericSchedulerBase::SchedCandidate &TryCand, - GenericSchedulerBase::SchedCandidate &Cand, - GenericSchedulerBase::CandReason Reason, - const TargetRegisterInfo *TRI, - const MachineFunction &MF) { +namespace llvm { +bool tryPressure(const PressureChange &TryP, + const PressureChange &CandP, + GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + GenericSchedulerBase::CandReason Reason, + const TargetRegisterInfo *TRI, + const MachineFunction &MF) { // If one candidate decreases and the other increases, go with it. // Invalid candidates have UnitInc==0. if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand, @@ -2810,7 +2798,7 @@ static bool tryPressure(const PressureChange &TryP, return tryGreater(TryRank, CandRank, TryCand, Cand, Reason); } -static unsigned getWeakLeft(const SUnit *SU, bool isTop) { +unsigned getWeakLeft(const SUnit *SU, bool isTop) { return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft; } @@ -2821,7 +2809,7 @@ static unsigned getWeakLeft(const SUnit *SU, bool isTop) { /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled /// with the operation that produces or consumes the physreg. We'll do this when /// regalloc has support for parallel copies. -static int biasPhysRegCopy(const SUnit *SU, bool isTop) { +int biasPhysRegCopy(const SUnit *SU, bool isTop) { const MachineInstr *MI = SU->getInstr(); if (!MI->isCopy()) return 0; @@ -2841,6 +2829,7 @@ static int biasPhysRegCopy(const SUnit *SU, bool isTop) { return AtBoundary ? -1 : 1; return 0; } +} // end namespace llvm void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, @@ -2873,13 +2862,13 @@ void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU, } } } - DEBUG(if (Cand.RPDelta.Excess.isValid()) - dbgs() << " Try SU(" << Cand.SU->NodeNum << ") " - << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) - << ":" << Cand.RPDelta.Excess.getUnitInc() << "\n"); + LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs() + << " Try SU(" << Cand.SU->NodeNum << ") " + << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":" + << Cand.RPDelta.Excess.getUnitInc() << "\n"); } -/// Apply a set of heursitics to a new candidate. Heuristics are currently +/// Apply a set of heuristics to a new candidate. Heuristics are currently /// hierarchical. This may be more efficient than a graduated cost model because /// we don't need to evaluate all aspects of the model for each node in the /// queue. But it's really done to make the heuristics easier to debug and @@ -2891,7 +2880,7 @@ void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU, // if Cand is from a different zone than TryCand. void GenericScheduler::tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, - SchedBoundary *Zone) { + SchedBoundary *Zone) const { // Initialize the candidate if needed. if (!Cand.isValid()) { TryCand.Reason = NodeOrder; @@ -3017,7 +3006,7 @@ void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone, if (TryCand.ResDelta == SchedResourceDelta()) TryCand.initResourceDelta(DAG, SchedModel); Cand.setBest(TryCand); - DEBUG(traceCandidate(Cand)); + LLVM_DEBUG(traceCandidate(Cand)); } } } @@ -3046,14 +3035,14 @@ SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) { setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot); // See if BotCand is still valid (because we previously scheduled from Top). - DEBUG(dbgs() << "Picking from Bot:\n"); + LLVM_DEBUG(dbgs() << "Picking from Bot:\n"); if (!BotCand.isValid() || BotCand.SU->isScheduled || BotCand.Policy != BotPolicy) { BotCand.reset(CandPolicy()); pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand); assert(BotCand.Reason != NoCand && "failed to find the first candidate"); } else { - DEBUG(traceCandidate(BotCand)); + LLVM_DEBUG(traceCandidate(BotCand)); #ifndef NDEBUG if (VerifyScheduling) { SchedCandidate TCand; @@ -3066,14 +3055,14 @@ SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) { } // Check if the top Q has a better candidate. - DEBUG(dbgs() << "Picking from Top:\n"); + LLVM_DEBUG(dbgs() << "Picking from Top:\n"); if (!TopCand.isValid() || TopCand.SU->isScheduled || TopCand.Policy != TopPolicy) { TopCand.reset(CandPolicy()); pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand); assert(TopCand.Reason != NoCand && "failed to find the first candidate"); } else { - DEBUG(traceCandidate(TopCand)); + LLVM_DEBUG(traceCandidate(TopCand)); #ifndef NDEBUG if (VerifyScheduling) { SchedCandidate TCand; @@ -3093,7 +3082,7 @@ SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) { tryCandidate(Cand, TopCand, nullptr); if (TopCand.Reason != NoCand) { Cand.setBest(TopCand); - DEBUG(traceCandidate(Cand)); + LLVM_DEBUG(traceCandidate(Cand)); } IsTopNode = Cand.AtTop; @@ -3142,7 +3131,8 @@ SUnit *GenericScheduler::pickNode(bool &IsTopNode) { if (SU->isBottomReady()) Bot.removeReady(SU); - DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr()); + LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " + << *SU->getInstr()); return SU; } @@ -3163,8 +3153,8 @@ void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) { MachineInstr *Copy = DepSU->getInstr(); if (!Copy->isCopy()) continue; - DEBUG(dbgs() << " Rescheduling physreg copy "; - Dep.getSUnit()->dump(DAG)); + LLVM_DEBUG(dbgs() << " Rescheduling physreg copy "; + Dep.getSUnit()->dump(DAG)); DAG->moveInstruction(Copy, InsertPos); } } @@ -3243,13 +3233,13 @@ void PostGenericScheduler::registerRoots() { if (SU->getDepth() > Rem.CriticalPath) Rem.CriticalPath = SU->getDepth(); } - DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n'); + LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n'); if (DumpCriticalPathLength) { errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n"; } } -/// Apply a set of heursitics to a new candidate for PostRA scheduling. +/// Apply a set of heuristics to a new candidate for PostRA scheduling. /// /// \param Cand provides the policy and current best candidate. /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. @@ -3301,7 +3291,7 @@ void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) { tryCandidate(Cand, TryCand); if (TryCand.Reason != NoCand) { Cand.setBest(TryCand); - DEBUG(traceCandidate(Cand)); + LLVM_DEBUG(traceCandidate(Cand)); } } } @@ -3333,7 +3323,8 @@ SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) { IsTopNode = true; Top.removeReady(SU); - DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr()); + LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " + << *SU->getInstr()); return SU; } @@ -3355,7 +3346,7 @@ ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) { namespace { -/// \brief Order nodes by the ILP metric. +/// Order nodes by the ILP metric. struct ILPOrder { const SchedDFSResult *DFSResult = nullptr; const BitVector *ScheduledTrees = nullptr; @@ -3363,7 +3354,7 @@ struct ILPOrder { ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {} - /// \brief Apply a less-than relation on node priority. + /// Apply a less-than relation on node priority. /// /// (Return true if A comes after B in the Q.) bool operator()(const SUnit *A, const SUnit *B) const { @@ -3388,7 +3379,7 @@ struct ILPOrder { } }; -/// \brief Schedule based on the ILP metric. +/// Schedule based on the ILP metric. class ILPScheduler : public MachineSchedStrategy { ScheduleDAGMILive *DAG = nullptr; ILPOrder Cmp; @@ -3422,16 +3413,19 @@ public: SUnit *SU = ReadyQ.back(); ReadyQ.pop_back(); IsTopNode = false; - DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") " - << " ILP: " << DAG->getDFSResult()->getILP(SU) - << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @" - << DAG->getDFSResult()->getSubtreeLevel( - DAG->getDFSResult()->getSubtreeID(SU)) << '\n' - << "Scheduling " << *SU->getInstr()); + LLVM_DEBUG(dbgs() << "Pick node " + << "SU(" << SU->NodeNum << ") " + << " ILP: " << DAG->getDFSResult()->getILP(SU) + << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) + << " @" + << DAG->getDFSResult()->getSubtreeLevel( + DAG->getDFSResult()->getSubtreeID(SU)) + << '\n' + << "Scheduling " << *SU->getInstr()); return SU; } - /// \brief Scheduler callback to notify that a new subtree is scheduled. + /// Scheduler callback to notify that a new subtree is scheduled. void scheduleTree(unsigned SubtreeID) override { std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); } diff --git a/contrib/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm/lib/CodeGen/MachineSink.cpp index bedfdd84b1ca..354f46e9e625 100644 --- a/contrib/llvm/lib/CodeGen/MachineSink.cpp +++ b/contrib/llvm/lib/CodeGen/MachineSink.cpp @@ -77,6 +77,7 @@ static cl::opt<unsigned> SplitEdgeProbabilityThreshold( STATISTIC(NumSunk, "Number of machine instructions sunk"); STATISTIC(NumSplit, "Number of critical edges split"); STATISTIC(NumCoalesces, "Number of copies coalesced"); +STATISTIC(NumPostRACopySink, "Number of copies sunk after RA"); namespace { @@ -138,7 +139,7 @@ namespace { MachineBasicBlock *From, MachineBasicBlock *To); - /// \brief Postpone the splitting of the given critical + /// Postpone the splitting of the given critical /// edge (\p From, \p To). /// /// We do not split the edges on the fly. Indeed, this invalidates @@ -210,8 +211,8 @@ bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI, MachineInstr *DefMI = MRI->getVRegDef(SrcReg); if (DefMI->isCopyLike()) return false; - DEBUG(dbgs() << "Coalescing: " << *DefMI); - DEBUG(dbgs() << "*** to: " << MI); + LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI); + LLVM_DEBUG(dbgs() << "*** to: " << MI); MRI->replaceRegWith(DstReg, SrcReg); MI.eraseFromParent(); @@ -295,7 +296,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; - DEBUG(dbgs() << "******** Machine Sinking ********\n"); + LLVM_DEBUG(dbgs() << "******** Machine Sinking ********\n"); TII = MF.getSubtarget().getInstrInfo(); TRI = MF.getSubtarget().getRegisterInfo(); @@ -322,14 +323,14 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { for (auto &Pair : ToSplit) { auto NewSucc = Pair.first->SplitCriticalEdge(Pair.second, *this); if (NewSucc != nullptr) { - DEBUG(dbgs() << " *** Splitting critical edge: " - << printMBBReference(*Pair.first) << " -- " - << printMBBReference(*NewSucc) << " -- " - << printMBBReference(*Pair.second) << '\n'); + LLVM_DEBUG(dbgs() << " *** Splitting critical edge: " + << printMBBReference(*Pair.first) << " -- " + << printMBBReference(*NewSucc) << " -- " + << printMBBReference(*Pair.second) << '\n'); MadeChange = true; ++NumSplit; } else - DEBUG(dbgs() << " *** Not legal to break critical edge\n"); + LLVM_DEBUG(dbgs() << " *** Not legal to break critical edge\n"); } // If this iteration over the code changed anything, keep iterating. if (!MadeChange) break; @@ -371,7 +372,7 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { if (!ProcessedBegin) --I; - if (MI.isDebugValue()) + if (MI.isDebugInstr()) continue; bool Joined = PerformTrivialForwardCoalescing(MI, &MBB); @@ -708,7 +709,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, return SuccToSinkTo; } -/// \brief Return true if MI is likely to be usable as a memory operation by the +/// Return true if MI is likely to be usable as a memory operation by the /// implicit null check optimization. /// /// This is a "best effort" heuristic, and should not be relied upon for @@ -752,6 +753,37 @@ static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI, MBP.LHS.getReg() == BaseReg; } +/// Sink an instruction and its associated debug instructions. +static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo, + MachineBasicBlock::iterator InsertPos) { + // Collect matching debug values. + SmallVector<MachineInstr *, 2> DbgValuesToSink; + collectDebugValues(MI, DbgValuesToSink); + + // If we cannot find a location to use (merge with), then we erase the debug + // location to prevent debug-info driven tools from potentially reporting + // wrong location information. + if (!SuccToSinkTo.empty() && InsertPos != SuccToSinkTo.end()) + MI.setDebugLoc(DILocation::getMergedLocation(MI.getDebugLoc(), + InsertPos->getDebugLoc())); + else + MI.setDebugLoc(DebugLoc()); + + // Move the instruction. + MachineBasicBlock *ParentBlock = MI.getParent(); + SuccToSinkTo.splice(InsertPos, ParentBlock, MI, + ++MachineBasicBlock::iterator(MI)); + + // Move previously adjacent debug value instructions to the insert position. + for (SmallVectorImpl<MachineInstr *>::iterator DBI = DbgValuesToSink.begin(), + DBE = DbgValuesToSink.end(); + DBI != DBE; ++DBI) { + MachineInstr *DbgMI = *DBI; + SuccToSinkTo.splice(InsertPos, ParentBlock, DbgMI, + ++MachineBasicBlock::iterator(DbgMI)); + } +} + /// SinkInstruction - Determine whether it is safe to sink the specified machine /// instruction out of its current block into a successor. bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, @@ -803,7 +835,7 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, return false; } - DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccToSinkTo); + LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccToSinkTo); // If the block has multiple predecessors, this is a critical edge. // Decide if we can sink along it or need to break the edge. @@ -813,26 +845,26 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, bool TryBreak = false; bool store = true; if (!MI.isSafeToMove(AA, store)) { - DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n"); + LLVM_DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n"); TryBreak = true; } // We don't want to sink across a critical edge if we don't dominate the // successor. We could be introducing calculations to new code paths. if (!TryBreak && !DT->dominates(ParentBlock, SuccToSinkTo)) { - DEBUG(dbgs() << " *** NOTE: Critical edge found\n"); + LLVM_DEBUG(dbgs() << " *** NOTE: Critical edge found\n"); TryBreak = true; } // Don't sink instructions into a loop. if (!TryBreak && LI->isLoopHeader(SuccToSinkTo)) { - DEBUG(dbgs() << " *** NOTE: Loop header found\n"); + LLVM_DEBUG(dbgs() << " *** NOTE: Loop header found\n"); TryBreak = true; } // Otherwise we are OK with sinking along a critical edge. if (!TryBreak) - DEBUG(dbgs() << "Sinking along critical edge.\n"); + LLVM_DEBUG(dbgs() << "Sinking along critical edge.\n"); else { // Mark this edge as to be split. // If the edge can actually be split, the next iteration of the main loop @@ -840,8 +872,8 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, bool Status = PostponeSplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge); if (!Status) - DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to " - "break critical edge\n"); + LLVM_DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to " + "break critical edge\n"); // The instruction will not be sunk this time. return false; } @@ -854,8 +886,8 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, bool Status = PostponeSplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge); if (!Status) - DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to " - "break critical edge\n"); + LLVM_DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to " + "break critical edge\n"); // The instruction will not be sunk this time. return false; } @@ -865,30 +897,7 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI()) ++InsertPos; - // collect matching debug values. - SmallVector<MachineInstr *, 2> DbgValuesToSink; - collectDebugValues(MI, DbgValuesToSink); - - // Merge or erase debug location to ensure consistent stepping in profilers - // and debuggers. - if (!SuccToSinkTo->empty() && InsertPos != SuccToSinkTo->end()) - MI.setDebugLoc(DILocation::getMergedLocation(MI.getDebugLoc(), - InsertPos->getDebugLoc())); - else - MI.setDebugLoc(DebugLoc()); - - - // Move the instruction. - SuccToSinkTo->splice(InsertPos, ParentBlock, MI, - ++MachineBasicBlock::iterator(MI)); - - // Move previously adjacent debug value instructions to the insert position. - for (SmallVectorImpl<MachineInstr *>::iterator DBI = DbgValuesToSink.begin(), - DBE = DbgValuesToSink.end(); DBI != DBE; ++DBI) { - MachineInstr *DbgMI = *DBI; - SuccToSinkTo->splice(InsertPos, ParentBlock, DbgMI, - ++MachineBasicBlock::iterator(DbgMI)); - } + performSink(MI, *SuccToSinkTo, InsertPos); // Conservatively, clear any kill flags, since it's possible that they are no // longer correct. @@ -902,3 +911,282 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, return true; } + +//===----------------------------------------------------------------------===// +// This pass is not intended to be a replacement or a complete alternative +// for the pre-ra machine sink pass. It is only designed to sink COPY +// instructions which should be handled after RA. +// +// This pass sinks COPY instructions into a successor block, if the COPY is not +// used in the current block and the COPY is live-in to a single successor +// (i.e., doesn't require the COPY to be duplicated). This avoids executing the +// copy on paths where their results aren't needed. This also exposes +// additional opportunites for dead copy elimination and shrink wrapping. +// +// These copies were either not handled by or are inserted after the MachineSink +// pass. As an example of the former case, the MachineSink pass cannot sink +// COPY instructions with allocatable source registers; for AArch64 these type +// of copy instructions are frequently used to move function parameters (PhyReg) +// into virtual registers in the entry block. +// +// For the machine IR below, this pass will sink %w19 in the entry into its +// successor (%bb.1) because %w19 is only live-in in %bb.1. +// %bb.0: +// %wzr = SUBSWri %w1, 1 +// %w19 = COPY %w0 +// Bcc 11, %bb.2 +// %bb.1: +// Live Ins: %w19 +// BL @fun +// %w0 = ADDWrr %w0, %w19 +// RET %w0 +// %bb.2: +// %w0 = COPY %wzr +// RET %w0 +// As we sink %w19 (CSR in AArch64) into %bb.1, the shrink-wrapping pass will be +// able to see %bb.0 as a candidate. +//===----------------------------------------------------------------------===// +namespace { + +class PostRAMachineSinking : public MachineFunctionPass { +public: + bool runOnMachineFunction(MachineFunction &MF) override; + + static char ID; + PostRAMachineSinking() : MachineFunctionPass(ID) {} + StringRef getPassName() const override { return "PostRA Machine Sink"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + +private: + /// Track which register units have been modified and used. + LiveRegUnits ModifiedRegUnits, UsedRegUnits; + + /// Sink Copy instructions unused in the same block close to their uses in + /// successors. + bool tryToSinkCopy(MachineBasicBlock &BB, MachineFunction &MF, + const TargetRegisterInfo *TRI, const TargetInstrInfo *TII); +}; +} // namespace + +char PostRAMachineSinking::ID = 0; +char &llvm::PostRAMachineSinkingID = PostRAMachineSinking::ID; + +INITIALIZE_PASS(PostRAMachineSinking, "postra-machine-sink", + "PostRA Machine Sink", false, false) + +static bool aliasWithRegsInLiveIn(MachineBasicBlock &MBB, unsigned Reg, + const TargetRegisterInfo *TRI) { + LiveRegUnits LiveInRegUnits(*TRI); + LiveInRegUnits.addLiveIns(MBB); + return !LiveInRegUnits.available(Reg); +} + +static MachineBasicBlock * +getSingleLiveInSuccBB(MachineBasicBlock &CurBB, + const SmallPtrSetImpl<MachineBasicBlock *> &SinkableBBs, + unsigned Reg, const TargetRegisterInfo *TRI) { + // Try to find a single sinkable successor in which Reg is live-in. + MachineBasicBlock *BB = nullptr; + for (auto *SI : SinkableBBs) { + if (aliasWithRegsInLiveIn(*SI, Reg, TRI)) { + // If BB is set here, Reg is live-in to at least two sinkable successors, + // so quit. + if (BB) + return nullptr; + BB = SI; + } + } + // Reg is not live-in to any sinkable successors. + if (!BB) + return nullptr; + + // Check if any register aliased with Reg is live-in in other successors. + for (auto *SI : CurBB.successors()) { + if (!SinkableBBs.count(SI) && aliasWithRegsInLiveIn(*SI, Reg, TRI)) + return nullptr; + } + return BB; +} + +static MachineBasicBlock * +getSingleLiveInSuccBB(MachineBasicBlock &CurBB, + const SmallPtrSetImpl<MachineBasicBlock *> &SinkableBBs, + ArrayRef<unsigned> DefedRegsInCopy, + const TargetRegisterInfo *TRI) { + MachineBasicBlock *SingleBB = nullptr; + for (auto DefReg : DefedRegsInCopy) { + MachineBasicBlock *BB = + getSingleLiveInSuccBB(CurBB, SinkableBBs, DefReg, TRI); + if (!BB || (SingleBB && SingleBB != BB)) + return nullptr; + SingleBB = BB; + } + return SingleBB; +} + +static void clearKillFlags(MachineInstr *MI, MachineBasicBlock &CurBB, + SmallVectorImpl<unsigned> &UsedOpsInCopy, + LiveRegUnits &UsedRegUnits, + const TargetRegisterInfo *TRI) { + for (auto U : UsedOpsInCopy) { + MachineOperand &MO = MI->getOperand(U); + unsigned SrcReg = MO.getReg(); + if (!UsedRegUnits.available(SrcReg)) { + MachineBasicBlock::iterator NI = std::next(MI->getIterator()); + for (MachineInstr &UI : make_range(NI, CurBB.end())) { + if (UI.killsRegister(SrcReg, TRI)) { + UI.clearRegisterKills(SrcReg, TRI); + MO.setIsKill(true); + break; + } + } + } + } +} + +static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB, + SmallVectorImpl<unsigned> &UsedOpsInCopy, + SmallVectorImpl<unsigned> &DefedRegsInCopy) { + for (auto DefReg : DefedRegsInCopy) + SuccBB->removeLiveIn(DefReg); + for (auto U : UsedOpsInCopy) { + unsigned Reg = MI->getOperand(U).getReg(); + if (!SuccBB->isLiveIn(Reg)) + SuccBB->addLiveIn(Reg); + } +} + +static bool hasRegisterDependency(MachineInstr *MI, + SmallVectorImpl<unsigned> &UsedOpsInCopy, + SmallVectorImpl<unsigned> &DefedRegsInCopy, + LiveRegUnits &ModifiedRegUnits, + LiveRegUnits &UsedRegUnits) { + bool HasRegDependency = false; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isDef()) { + if (!ModifiedRegUnits.available(Reg) || !UsedRegUnits.available(Reg)) { + HasRegDependency = true; + break; + } + DefedRegsInCopy.push_back(Reg); + + // FIXME: instead of isUse(), readsReg() would be a better fix here, + // For example, we can ignore modifications in reg with undef. However, + // it's not perfectly clear if skipping the internal read is safe in all + // other targets. + } else if (MO.isUse()) { + if (!ModifiedRegUnits.available(Reg)) { + HasRegDependency = true; + break; + } + UsedOpsInCopy.push_back(i); + } + } + return HasRegDependency; +} + +bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, + MachineFunction &MF, + const TargetRegisterInfo *TRI, + const TargetInstrInfo *TII) { + SmallPtrSet<MachineBasicBlock *, 2> SinkableBBs; + // FIXME: For now, we sink only to a successor which has a single predecessor + // so that we can directly sink COPY instructions to the successor without + // adding any new block or branch instruction. + for (MachineBasicBlock *SI : CurBB.successors()) + if (!SI->livein_empty() && SI->pred_size() == 1) + SinkableBBs.insert(SI); + + if (SinkableBBs.empty()) + return false; + + bool Changed = false; + + // Track which registers have been modified and used between the end of the + // block and the current instruction. + ModifiedRegUnits.clear(); + UsedRegUnits.clear(); + + for (auto I = CurBB.rbegin(), E = CurBB.rend(); I != E;) { + MachineInstr *MI = &*I; + ++I; + + if (MI->isDebugInstr()) + continue; + + // Do not move any instruction across function call. + if (MI->isCall()) + return false; + + if (!MI->isCopy() || !MI->getOperand(0).isRenamable()) { + LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits, + TRI); + continue; + } + + // Track the operand index for use in Copy. + SmallVector<unsigned, 2> UsedOpsInCopy; + // Track the register number defed in Copy. + SmallVector<unsigned, 2> DefedRegsInCopy; + + // Don't sink the COPY if it would violate a register dependency. + if (hasRegisterDependency(MI, UsedOpsInCopy, DefedRegsInCopy, + ModifiedRegUnits, UsedRegUnits)) { + LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits, + TRI); + continue; + } + assert((!UsedOpsInCopy.empty() && !DefedRegsInCopy.empty()) && + "Unexpect SrcReg or DefReg"); + MachineBasicBlock *SuccBB = + getSingleLiveInSuccBB(CurBB, SinkableBBs, DefedRegsInCopy, TRI); + // Don't sink if we cannot find a single sinkable successor in which Reg + // is live-in. + if (!SuccBB) { + LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits, + TRI); + continue; + } + assert((SuccBB->pred_size() == 1 && *SuccBB->pred_begin() == &CurBB) && + "Unexpected predecessor"); + + // Clear the kill flag if SrcReg is killed between MI and the end of the + // block. + clearKillFlags(MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI); + MachineBasicBlock::iterator InsertPos = SuccBB->getFirstNonPHI(); + performSink(*MI, *SuccBB, InsertPos); + updateLiveIn(MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy); + + Changed = true; + ++NumPostRACopySink; + } + return Changed; +} + +bool PostRAMachineSinking::runOnMachineFunction(MachineFunction &MF) { + bool Changed = false; + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + + ModifiedRegUnits.init(*TRI); + UsedRegUnits.init(*TRI); + for (auto &BB : MF) + Changed |= tryToSinkCopy(BB, MF, TRI, TII); + + return Changed; +} diff --git a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp index d81c6f8a31e1..b444cd31eba2 100644 --- a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp +++ b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp @@ -70,7 +70,7 @@ bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) { TRI = ST.getRegisterInfo(); MRI = &MF->getRegInfo(); Loops = &getAnalysis<MachineLoopInfo>(); - SchedModel.init(ST.getSchedModel(), &ST, TII); + SchedModel.init(&ST); BlockInfo.resize(MF->getNumBlockIDs()); ProcResourceCycles.resize(MF->getNumBlockIDs() * SchedModel.getNumProcResourceKinds()); @@ -396,8 +396,8 @@ MachineTraceMetrics::getEnsemble(MachineTraceMetrics::Strategy strategy) { } void MachineTraceMetrics::invalidate(const MachineBasicBlock *MBB) { - DEBUG(dbgs() << "Invalidate traces through " << printMBBReference(*MBB) - << '\n'); + LLVM_DEBUG(dbgs() << "Invalidate traces through " << printMBBReference(*MBB) + << '\n'); BlockInfo[MBB->getNumber()].invalidate(); for (unsigned i = 0; i != TS_NumStrategies; ++i) if (Ensembles[i]) @@ -477,8 +477,8 @@ public: /// Compute the trace through MBB. void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) { - DEBUG(dbgs() << "Computing " << getName() << " trace through " - << printMBBReference(*MBB) << '\n'); + LLVM_DEBUG(dbgs() << "Computing " << getName() << " trace through " + << printMBBReference(*MBB) << '\n'); // Set up loop bounds for the backwards post-order traversal. LoopBounds Bounds(BlockInfo, MTM.Loops); @@ -486,11 +486,11 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) { Bounds.Downward = false; Bounds.Visited.clear(); for (auto I : inverse_post_order_ext(MBB, Bounds)) { - DEBUG(dbgs() << " pred for " << printMBBReference(*I) << ": "); + LLVM_DEBUG(dbgs() << " pred for " << printMBBReference(*I) << ": "); TraceBlockInfo &TBI = BlockInfo[I->getNumber()]; // All the predecessors have been visited, pick the preferred one. TBI.Pred = pickTracePred(I); - DEBUG({ + LLVM_DEBUG({ if (TBI.Pred) dbgs() << printMBBReference(*TBI.Pred) << '\n'; else @@ -504,11 +504,11 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) { Bounds.Downward = true; Bounds.Visited.clear(); for (auto I : post_order_ext(MBB, Bounds)) { - DEBUG(dbgs() << " succ for " << printMBBReference(*I) << ": "); + LLVM_DEBUG(dbgs() << " succ for " << printMBBReference(*I) << ": "); TraceBlockInfo &TBI = BlockInfo[I->getNumber()]; // All the successors have been visited, pick the preferred one. TBI.Succ = pickTraceSucc(I); - DEBUG({ + LLVM_DEBUG({ if (TBI.Succ) dbgs() << printMBBReference(*TBI.Succ) << '\n'; else @@ -531,8 +531,8 @@ MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) { WorkList.push_back(BadMBB); do { const MachineBasicBlock *MBB = WorkList.pop_back_val(); - DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' ' - << getName() << " height.\n"); + LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' ' + << getName() << " height.\n"); // Find any MBB predecessors that have MBB as their preferred successor. // They are the only ones that need to be invalidated. for (const MachineBasicBlock *Pred : MBB->predecessors()) { @@ -556,8 +556,8 @@ MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) { WorkList.push_back(BadMBB); do { const MachineBasicBlock *MBB = WorkList.pop_back_val(); - DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' ' - << getName() << " depth.\n"); + LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' ' + << getName() << " depth.\n"); // Find any MBB successors that have MBB as their preferred predecessor. // They are the only ones that need to be invalidated. for (const MachineBasicBlock *Succ : MBB->successors()) { @@ -653,7 +653,7 @@ static bool getDataDeps(const MachineInstr &UseMI, SmallVectorImpl<DataDep> &Deps, const MachineRegisterInfo *MRI) { // Debug values should not be included in any calculations. - if (UseMI.isDebugValue()) + if (UseMI.isDebugInstr()) return false; bool HasPhysRegs = false; @@ -813,9 +813,9 @@ updateDepth(MachineTraceMetrics::TraceBlockInfo &TBI, const MachineInstr &UseMI, if (TBI.HasValidInstrHeights) { // Update critical path length. TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height); - DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << UseMI); + LLVM_DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << UseMI); } else { - DEBUG(dbgs() << Cycle << '\t' << UseMI); + LLVM_DEBUG(dbgs() << Cycle << '\t' << UseMI); } } @@ -860,13 +860,13 @@ computeInstrDepths(const MachineBasicBlock *MBB) { // Go through trace blocks in top-down order, stopping after the center block. while (!Stack.empty()) { MBB = Stack.pop_back_val(); - DEBUG(dbgs() << "\nDepths for " << printMBBReference(*MBB) << ":\n"); + LLVM_DEBUG(dbgs() << "\nDepths for " << printMBBReference(*MBB) << ":\n"); TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; TBI.HasValidInstrDepths = true; TBI.CriticalPath = 0; // Print out resource depths here as well. - DEBUG({ + LLVM_DEBUG({ dbgs() << format("%7u Instructions\n", TBI.InstrDepth); ArrayRef<unsigned> PRDepths = getProcResourceDepths(MBB->getNumber()); for (unsigned K = 0; K != PRDepths.size(); ++K) @@ -1045,12 +1045,12 @@ computeInstrHeights(const MachineBasicBlock *MBB) { SmallVector<DataDep, 8> Deps; for (;!Stack.empty(); Stack.pop_back()) { MBB = Stack.back(); - DEBUG(dbgs() << "Heights for " << printMBBReference(*MBB) << ":\n"); + LLVM_DEBUG(dbgs() << "Heights for " << printMBBReference(*MBB) << ":\n"); TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; TBI.HasValidInstrHeights = true; TBI.CriticalPath = 0; - DEBUG({ + LLVM_DEBUG({ dbgs() << format("%7u Instructions\n", TBI.InstrHeight); ArrayRef<unsigned> PRHeights = getProcResourceHeights(MBB->getNumber()); for (unsigned K = 0; K != PRHeights.size(); ++K) @@ -1081,7 +1081,7 @@ computeInstrHeights(const MachineBasicBlock *MBB) { if (!Deps.empty()) { // Loop header PHI heights are all 0. unsigned Height = TBI.Succ ? Cycles.lookup(&PHI).Height : 0; - DEBUG(dbgs() << "pred\t" << Height << '\t' << PHI); + LLVM_DEBUG(dbgs() << "pred\t" << Height << '\t' << PHI); if (pushDepHeight(Deps.front(), PHI, Height, Heights, MTM.SchedModel, MTM.TII)) addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack); @@ -1122,38 +1122,38 @@ computeInstrHeights(const MachineBasicBlock *MBB) { InstrCycles &MICycles = Cycles[&MI]; MICycles.Height = Cycle; if (!TBI.HasValidInstrDepths) { - DEBUG(dbgs() << Cycle << '\t' << MI); + LLVM_DEBUG(dbgs() << Cycle << '\t' << MI); continue; } // Update critical path length. TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth); - DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << MI); + LLVM_DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << MI); } // Update virtual live-in heights. They were added by addLiveIns() with a 0 // height because the final height isn't known until now. - DEBUG(dbgs() << printMBBReference(*MBB) << " Live-ins:"); + LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " Live-ins:"); for (LiveInReg &LIR : TBI.LiveIns) { const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg); LIR.Height = Heights.lookup(DefMI); - DEBUG(dbgs() << ' ' << printReg(LIR.Reg) << '@' << LIR.Height); + LLVM_DEBUG(dbgs() << ' ' << printReg(LIR.Reg) << '@' << LIR.Height); } // Transfer the live regunits to the live-in list. for (SparseSet<LiveRegUnit>::const_iterator RI = RegUnits.begin(), RE = RegUnits.end(); RI != RE; ++RI) { TBI.LiveIns.push_back(LiveInReg(RI->RegUnit, RI->Cycle)); - DEBUG(dbgs() << ' ' << printRegUnit(RI->RegUnit, MTM.TRI) - << '@' << RI->Cycle); + LLVM_DEBUG(dbgs() << ' ' << printRegUnit(RI->RegUnit, MTM.TRI) << '@' + << RI->Cycle); } - DEBUG(dbgs() << '\n'); + LLVM_DEBUG(dbgs() << '\n'); if (!TBI.HasValidInstrDepths) continue; // Add live-ins to the critical path length. TBI.CriticalPath = std::max(TBI.CriticalPath, computeCrossBlockCriticalPath(TBI)); - DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n'); + LLVM_DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n'); } } diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp index e0cc2ca9a2a2..d644e41abc5b 100644 --- a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp @@ -239,7 +239,8 @@ namespace { void report(const char *msg, const MachineFunction *MF); void report(const char *msg, const MachineBasicBlock *MBB); void report(const char *msg, const MachineInstr *MI); - void report(const char *msg, const MachineOperand *MO, unsigned MONum); + void report(const char *msg, const MachineOperand *MO, unsigned MONum, + LLT MOVRegType = LLT{}); void report_context(const LiveInterval &LI) const; void report_context(const LiveRange &LR, unsigned VRegUnit, @@ -250,16 +251,16 @@ namespace { void report_context_liverange(const LiveRange &LR) const; void report_context_lanemask(LaneBitmask LaneMask) const; void report_context_vreg(unsigned VReg) const; - void report_context_vreg_regunit(unsigned VRegOrRegUnit) const; + void report_context_vreg_regunit(unsigned VRegOrUnit) const; void verifyInlineAsm(const MachineInstr *MI); void checkLiveness(const MachineOperand *MO, unsigned MONum); void checkLivenessAtUse(const MachineOperand *MO, unsigned MONum, - SlotIndex UseIdx, const LiveRange &LR, unsigned Reg, + SlotIndex UseIdx, const LiveRange &LR, unsigned VRegOrUnit, LaneBitmask LaneMask = LaneBitmask::getNone()); void checkLivenessAtDef(const MachineOperand *MO, unsigned MONum, - SlotIndex DefIdx, const LiveRange &LR, unsigned Reg, + SlotIndex DefIdx, const LiveRange &LR, unsigned VRegOrUnit, LaneBitmask LaneMask = LaneBitmask::getNone()); void markReachable(const MachineBasicBlock *MBB); @@ -359,11 +360,15 @@ unsigned MachineVerifier::verify(MachineFunction &MF) { TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); - isFunctionRegBankSelected = MF.getProperties().hasProperty( - MachineFunctionProperties::Property::RegBankSelected); - isFunctionSelected = MF.getProperties().hasProperty( - MachineFunctionProperties::Property::Selected); - + const bool isFunctionFailedISel = MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedISel); + isFunctionRegBankSelected = + !isFunctionFailedISel && + MF.getProperties().hasProperty( + MachineFunctionProperties::Property::RegBankSelected); + isFunctionSelected = !isFunctionFailedISel && + MF.getProperties().hasProperty( + MachineFunctionProperties::Property::Selected); LiveVars = nullptr; LiveInts = nullptr; LiveStks = nullptr; @@ -486,15 +491,14 @@ void MachineVerifier::report(const char *msg, const MachineInstr *MI) { if (Indexes && Indexes->hasIndex(*MI)) errs() << Indexes->getInstructionIndex(*MI) << '\t'; MI->print(errs(), /*SkipOpers=*/true); - errs() << '\n'; } -void MachineVerifier::report(const char *msg, - const MachineOperand *MO, unsigned MONum) { +void MachineVerifier::report(const char *msg, const MachineOperand *MO, + unsigned MONum, LLT MOVRegType) { assert(MO); report(msg, MO->getParent()); errs() << "- operand " << MONum << ": "; - MO->print(errs(), TRI); + MO->print(errs(), MOVRegType, TRI); errs() << "\n"; } @@ -642,7 +646,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { !(AsmInfo && AsmInfo->getExceptionHandlingType() == ExceptionHandling::SjLj && BB && isa<SwitchInst>(BB->getTerminator())) && - !isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) + !isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) report("MBB has more than one landing pad successor", MBB); // Call AnalyzeBranch. If it succeeds, there several more conditions to check. @@ -873,11 +877,11 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { if (MI->getNumOperands() < MCID.getNumOperands()) { report("Too few operands", MI); errs() << MCID.getNumOperands() << " operands expected, but " - << MI->getNumOperands() << " given.\n"; + << MI->getNumOperands() << " given.\n"; } if (MI->isPHI() && MF->getProperties().hasProperty( - MachineFunctionProperties::Property::NoPHIs)) + MachineFunctionProperties::Property::NoPHIs)) report("Found PHI instruction with NoPHIs property set", MI); // Check the tied operands. @@ -886,7 +890,8 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { // Check the MachineMemOperands for basic consistency. for (MachineInstr::mmo_iterator I = MI->memoperands_begin(), - E = MI->memoperands_end(); I != E; ++I) { + E = MI->memoperands_end(); + I != E; ++I) { if ((*I)->isLoad() && !MI->mayLoad()) report("Missing mayLoad flag", MI); if ((*I)->isStore() && !MI->mayStore()) @@ -897,7 +902,7 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { // Other instructions must have one, unless they are inside a bundle. if (LiveInts) { bool mapped = !LiveInts->isNotInMIMap(*MI); - if (MI->isDebugValue()) { + if (MI->isDebugInstr()) { if (mapped) report("Debug instruction has a slot index", MI); } else if (MI->isInsideBundle()) { @@ -909,32 +914,42 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { } } - // Check types. if (isPreISelGenericOpcode(MCID.getOpcode())) { if (isFunctionSelected) report("Unexpected generic instruction in a Selected function", MI); - // Generic instructions specify equality constraints between some - // of their operands. Make sure these are consistent. + // Check types. SmallVector<LLT, 4> Types; - for (unsigned i = 0; i < MCID.getNumOperands(); ++i) { - if (!MCID.OpInfo[i].isGenericType()) + for (unsigned I = 0; I < MCID.getNumOperands(); ++I) { + if (!MCID.OpInfo[I].isGenericType()) continue; - size_t TypeIdx = MCID.OpInfo[i].getGenericTypeIndex(); + // Generic instructions specify type equality constraints between some of + // their operands. Make sure these are consistent. + size_t TypeIdx = MCID.OpInfo[I].getGenericTypeIndex(); Types.resize(std::max(TypeIdx + 1, Types.size())); - LLT OpTy = MRI->getType(MI->getOperand(i).getReg()); - if (Types[TypeIdx].isValid() && Types[TypeIdx] != OpTy) - report("type mismatch in generic instruction", MI); - Types[TypeIdx] = OpTy; + const MachineOperand *MO = &MI->getOperand(I); + LLT OpTy = MRI->getType(MO->getReg()); + // Don't report a type mismatch if there is no actual mismatch, only a + // type missing, to reduce noise: + if (OpTy.isValid()) { + // Only the first valid type for a type index will be printed: don't + // overwrite it later so it's always clear which type was expected: + if (!Types[TypeIdx].isValid()) + Types[TypeIdx] = OpTy; + else if (Types[TypeIdx] != OpTy) + report("Type mismatch in generic instruction", MO, I, OpTy); + } else { + // Generic instructions must have types attached to their operands. + report("Generic instruction is missing a virtual register type", MO, I); + } } - } - // Generic opcodes must not have physical register operands. - if (isPreISelGenericOpcode(MCID.getOpcode())) { - for (auto &Op : MI->operands()) { - if (Op.isReg() && TargetRegisterInfo::isPhysicalRegister(Op.getReg())) - report("Generic instruction cannot have physical register", MI); + // Generic opcodes must not have physical register operands. + for (unsigned I = 0; I < MI->getNumOperands(); ++I) { + const MachineOperand *MO = &MI->getOperand(I); + if (MO->isReg() && TargetRegisterInfo::isPhysicalRegister(MO->getReg())) + report("Generic instruction cannot have physical register", MO, I); } } @@ -971,6 +986,88 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { MI); break; } + case TargetOpcode::G_SEXT: + case TargetOpcode::G_ZEXT: + case TargetOpcode::G_ANYEXT: + case TargetOpcode::G_TRUNC: + case TargetOpcode::G_FPEXT: + case TargetOpcode::G_FPTRUNC: { + // Number of operands and presense of types is already checked (and + // reported in case of any issues), so no need to report them again. As + // we're trying to report as many issues as possible at once, however, the + // instructions aren't guaranteed to have the right number of operands or + // types attached to them at this point + assert(MCID.getNumOperands() == 2 && "Expected 2 operands G_*{EXT,TRUNC}"); + if (MI->getNumOperands() < MCID.getNumOperands()) + break; + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + LLT SrcTy = MRI->getType(MI->getOperand(1).getReg()); + if (!DstTy.isValid() || !SrcTy.isValid()) + break; + + LLT DstElTy = DstTy.isVector() ? DstTy.getElementType() : DstTy; + LLT SrcElTy = SrcTy.isVector() ? SrcTy.getElementType() : SrcTy; + if (DstElTy.isPointer() || SrcElTy.isPointer()) + report("Generic extend/truncate can not operate on pointers", MI); + + if (DstTy.isVector() != SrcTy.isVector()) { + report("Generic extend/truncate must be all-vector or all-scalar", MI); + // Generally we try to report as many issues as possible at once, but in + // this case it's not clear what should we be comparing the size of the + // scalar with: the size of the whole vector or its lane. Instead of + // making an arbitrary choice and emitting not so helpful message, let's + // avoid the extra noise and stop here. + break; + } + if (DstTy.isVector() && DstTy.getNumElements() != SrcTy.getNumElements()) + report("Generic vector extend/truncate must preserve number of lanes", + MI); + unsigned DstSize = DstElTy.getSizeInBits(); + unsigned SrcSize = SrcElTy.getSizeInBits(); + switch (MI->getOpcode()) { + default: + if (DstSize <= SrcSize) + report("Generic extend has destination type no larger than source", MI); + break; + case TargetOpcode::G_TRUNC: + case TargetOpcode::G_FPTRUNC: + if (DstSize >= SrcSize) + report("Generic truncate has destination type no smaller than source", + MI); + break; + } + break; + } + case TargetOpcode::COPY: { + if (foundErrors) + break; + const MachineOperand &DstOp = MI->getOperand(0); + const MachineOperand &SrcOp = MI->getOperand(1); + LLT DstTy = MRI->getType(DstOp.getReg()); + LLT SrcTy = MRI->getType(SrcOp.getReg()); + if (SrcTy.isValid() && DstTy.isValid()) { + // If both types are valid, check that the types are the same. + if (SrcTy != DstTy) { + report("Copy Instruction is illegal with mismatching types", MI); + errs() << "Def = " << DstTy << ", Src = " << SrcTy << "\n"; + } + } + if (SrcTy.isValid() || DstTy.isValid()) { + // If one of them have valid types, let's just check they have the same + // size. + unsigned SrcSize = TRI->getRegSizeInBits(SrcOp.getReg(), *MRI); + unsigned DstSize = TRI->getRegSizeInBits(DstOp.getReg(), *MRI); + assert(SrcSize && "Expecting size here"); + assert(DstSize && "Expecting size here"); + if (SrcSize != DstSize) + if (!DstOp.getSubReg() && !SrcOp.getSubReg()) { + report("Copy Instruction is illegal with mismatching sizes", MI); + errs() << "Def Size = " << DstSize << ", Src Size = " << SrcSize + << "\n"; + } + } + break; + } case TargetOpcode::STATEPOINT: if (!MI->getOperand(StatepointOpers::IDPos).isImm() || !MI->getOperand(StatepointOpers::NBytesPos).isImm() || @@ -1101,12 +1198,14 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } } } - if (MO->isRenamable() && - ((MO->isDef() && MI->hasExtraDefRegAllocReq()) || - (MO->isUse() && MI->hasExtraSrcRegAllocReq()))) { - report("Illegal isRenamable setting for opcode with extra regalloc " - "requirements", - MO, MONum); + if (MO->isRenamable()) { + if (MRI->isReserved(Reg)) { + report("isRenamable set on reserved register", MO, MONum); + return; + } + } + if (MI->isDebugValue() && MO->isUse() && !MO->isDebug()) { + report("Use-reg is not IsDebug in a DBG_VALUE", MO, MONum); return; } } else { diff --git a/contrib/llvm/lib/CodeGen/MacroFusion.cpp b/contrib/llvm/lib/CodeGen/MacroFusion.cpp index e7f426c469a0..62dadbba0c1a 100644 --- a/contrib/llvm/lib/CodeGen/MacroFusion.cpp +++ b/contrib/llvm/lib/CodeGen/MacroFusion.cpp @@ -66,11 +66,11 @@ static bool fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU, if (SI.getSUnit() == &FirstSU) SI.setLatency(0); - DEBUG(dbgs() << "Macro fuse: "; - FirstSU.print(dbgs(), &DAG); dbgs() << " - "; - SecondSU.print(dbgs(), &DAG); dbgs() << " / "; - dbgs() << DAG.TII->getName(FirstSU.getInstr()->getOpcode()) << " - " << - DAG.TII->getName(SecondSU.getInstr()->getOpcode()) << '\n'; ); + LLVM_DEBUG( + dbgs() << "Macro fuse: "; FirstSU.print(dbgs(), &DAG); dbgs() << " - "; + SecondSU.print(dbgs(), &DAG); dbgs() << " / "; + dbgs() << DAG.TII->getName(FirstSU.getInstr()->getOpcode()) << " - " + << DAG.TII->getName(SecondSU.getInstr()->getOpcode()) << '\n';); // Make data dependencies from the FirstSU also dependent on the SecondSU to // prevent them from being scheduled between the FirstSU and the SecondSU. @@ -80,24 +80,32 @@ static bool fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU, if (SI.isWeak() || isHazard(SI) || SU == &DAG.ExitSU || SU == &SecondSU || SU->isPred(&SecondSU)) continue; - DEBUG(dbgs() << " Bind "; - SecondSU.print(dbgs(), &DAG); dbgs() << " - "; - SU->print(dbgs(), &DAG); dbgs() << '\n';); + LLVM_DEBUG(dbgs() << " Bind "; SecondSU.print(dbgs(), &DAG); + dbgs() << " - "; SU->print(dbgs(), &DAG); dbgs() << '\n';); DAG.addEdge(SU, SDep(&SecondSU, SDep::Artificial)); } // Make the FirstSU also dependent on the dependencies of the SecondSU to // prevent them from being scheduled between the FirstSU and the SecondSU. - if (&FirstSU != &DAG.EntrySU) + if (&FirstSU != &DAG.EntrySU) { for (const SDep &SI : SecondSU.Preds) { SUnit *SU = SI.getSUnit(); if (SI.isWeak() || isHazard(SI) || &FirstSU == SU || FirstSU.isSucc(SU)) continue; - DEBUG(dbgs() << " Bind "; - SU->print(dbgs(), &DAG); dbgs() << " - "; - FirstSU.print(dbgs(), &DAG); dbgs() << '\n';); + LLVM_DEBUG(dbgs() << " Bind "; SU->print(dbgs(), &DAG); dbgs() << " - "; + FirstSU.print(dbgs(), &DAG); dbgs() << '\n';); DAG.addEdge(&FirstSU, SDep(SU, SDep::Artificial)); } + // ExitSU comes last by design, which acts like an implicit dependency + // between ExitSU and any bottom root in the graph. We should transfer + // this to FirstSU as well. + if (&SecondSU == &DAG.ExitSU) { + for (SUnit &SU : DAG.SUnits) { + if (SU.Succs.empty()) + DAG.addEdge(&FirstSU, SDep(&SU, SDep::Artificial)); + } + } + } ++NumFused; return true; @@ -105,7 +113,7 @@ static bool fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU, namespace { -/// \brief Post-process the DAG to create cluster edges between instrs that may +/// Post-process the DAG to create cluster edges between instrs that may /// be fused by the processor into a single operation. class MacroFusion : public ScheduleDAGMutation { ShouldSchedulePredTy shouldScheduleAdjacent; @@ -135,7 +143,7 @@ void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) { scheduleAdjacentImpl(*DAG, DAG->ExitSU); } -/// \brief Implement the fusion of instr pairs in the scheduling DAG, +/// Implement the fusion of instr pairs in the scheduling DAG, /// anchored at the instr in AnchorSU.. bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGMI &DAG, SUnit &AnchorSU) { const MachineInstr &AnchorMI = *AnchorSU.getInstr(); diff --git a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp index 8972867ba083..befa8422d399 100644 --- a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp +++ b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp @@ -45,7 +45,7 @@ namespace { initializeOptimizePHIsPass(*PassRegistry::getPassRegistry()); } - bool runOnMachineFunction(MachineFunction &MF) override; + bool runOnMachineFunction(MachineFunction &Fn) override; void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); diff --git a/contrib/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm/lib/CodeGen/PHIElimination.cpp index 54c5a940275d..7a5c20000066 100644 --- a/contrib/llvm/lib/CodeGen/PHIElimination.cpp +++ b/contrib/llvm/lib/CodeGen/PHIElimination.cpp @@ -75,7 +75,7 @@ namespace { initializePHIEliminationPass(*PassRegistry::getPassRegistry()); } - bool runOnMachineFunction(MachineFunction &Fn) override; + bool runOnMachineFunction(MachineFunction &MF) override; void getAnalysisUsage(AnalysisUsage &AU) const override; private: @@ -91,7 +91,7 @@ namespace { /// register which is used in a PHI node. We map that to the BB the /// vreg is coming from. This is used later to determine when the vreg /// is killed in the BB. - void analyzePHINodes(const MachineFunction& Fn); + void analyzePHINodes(const MachineFunction& MF); /// Split critical edges where necessary for good coalescer performance. bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB, @@ -270,7 +270,8 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, IncomingReg = entry; reusedIncoming = true; ++NumReused; - DEBUG(dbgs() << "Reusing " << printReg(IncomingReg) << " for " << *MPhi); + LLVM_DEBUG(dbgs() << "Reusing " << printReg(IncomingReg) << " for " + << *MPhi); } else { const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg); entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC); @@ -295,9 +296,9 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, // AfterPHIsIt, so it appears before the current PHICopy. if (reusedIncoming) if (MachineInstr *OldKill = VI.findKill(&MBB)) { - DEBUG(dbgs() << "Remove old kill from " << *OldKill); + LLVM_DEBUG(dbgs() << "Remove old kill from " << *OldKill); LV->removeVirtualRegisterKilled(IncomingReg, *OldKill); - DEBUG(MBB.dump()); + LLVM_DEBUG(MBB.dump()); } // Add information to LiveVariables to know that the incoming value is @@ -452,7 +453,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, KillInst = FirstTerm; while (KillInst != opBlock.begin()) { --KillInst; - if (KillInst->isDebugValue()) + if (KillInst->isDebugInstr()) continue; if (KillInst->readsRegister(SrcReg)) break; @@ -512,7 +513,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, KillInst = FirstTerm; while (KillInst != opBlock.begin()) { --KillInst; - if (KillInst->isDebugValue()) + if (KillInst->isDebugInstr()) continue; if (KillInst->readsRegister(SrcReg)) break; @@ -593,9 +594,9 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, if (!ShouldSplit && !NoPhiElimLiveOutEarlyExit) continue; if (ShouldSplit) { - DEBUG(dbgs() << printReg(Reg) << " live-out before critical edge " - << printMBBReference(*PreMBB) << " -> " - << printMBBReference(MBB) << ": " << *BBI); + LLVM_DEBUG(dbgs() << printReg(Reg) << " live-out before critical edge " + << printMBBReference(*PreMBB) << " -> " + << printMBBReference(MBB) << ": " << *BBI); } // If Reg is not live-in to MBB, it means it must be live-in to some @@ -610,10 +611,12 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, // Check for a loop exiting edge. if (!ShouldSplit && CurLoop != PreLoop) { - DEBUG({ + LLVM_DEBUG({ dbgs() << "Split wouldn't help, maybe avoid loop copies?\n"; - if (PreLoop) dbgs() << "PreLoop: " << *PreLoop; - if (CurLoop) dbgs() << "CurLoop: " << *CurLoop; + if (PreLoop) + dbgs() << "PreLoop: " << *PreLoop; + if (CurLoop) + dbgs() << "CurLoop: " << *CurLoop; }); // This edge could be entering a loop, exiting a loop, or it could be // both: Jumping directly form one loop to the header of a sibling @@ -624,7 +627,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, if (!ShouldSplit && !SplitAllCriticalEdges) continue; if (!PreMBB->SplitCriticalEdge(&MBB, *this)) { - DEBUG(dbgs() << "Failed to split critical edge.\n"); + LLVM_DEBUG(dbgs() << "Failed to split critical edge.\n"); continue; } Changed = true; diff --git a/contrib/llvm/lib/CodeGen/ParallelCG.cpp b/contrib/llvm/lib/CodeGen/ParallelCG.cpp index ff8680a0540d..bc3f2a6e9b5a 100644 --- a/contrib/llvm/lib/CodeGen/ParallelCG.cpp +++ b/contrib/llvm/lib/CodeGen/ParallelCG.cpp @@ -30,7 +30,7 @@ static void codegen(Module *M, llvm::raw_pwrite_stream &OS, TargetMachine::CodeGenFileType FileType) { std::unique_ptr<TargetMachine> TM = TMFactory(); legacy::PassManager CodeGenPasses; - if (TM->addPassesToEmitFile(CodeGenPasses, OS, FileType)) + if (TM->addPassesToEmitFile(CodeGenPasses, OS, nullptr, FileType)) report_fatal_error("Failed to setup codegen"); CodeGenPasses.run(*M); } @@ -44,7 +44,7 @@ std::unique_ptr<Module> llvm::splitCodeGen( if (OSs.size() == 1) { if (!BCOSs.empty()) - WriteBitcodeToFile(M.get(), *BCOSs[0]); + WriteBitcodeToFile(*M, *BCOSs[0]); codegen(M.get(), *OSs[0], TMFactory, FileType); return M; } @@ -66,7 +66,7 @@ std::unique_ptr<Module> llvm::splitCodeGen( // FIXME: Provide a more direct way to do this in LLVM. SmallString<0> BC; raw_svector_ostream BCOS(BC); - WriteBitcodeToFile(MPart.get(), BCOS); + WriteBitcodeToFile(*MPart, BCOS); if (!BCOSs.empty()) { BCOSs[ThreadCount]->write(BC.begin(), BC.size()); diff --git a/contrib/llvm/lib/CodeGen/PatchableFunction.cpp b/contrib/llvm/lib/CodeGen/PatchableFunction.cpp index 0957705b19bb..afb4b0a7e174 100644 --- a/contrib/llvm/lib/CodeGen/PatchableFunction.cpp +++ b/contrib/llvm/lib/CodeGen/PatchableFunction.cpp @@ -49,6 +49,7 @@ static bool doesNotGeneratecode(const MachineInstr &MI) { case TargetOpcode::EH_LABEL: case TargetOpcode::GC_LABEL: case TargetOpcode::DBG_VALUE: + case TargetOpcode::DBG_LABEL: return true; } } diff --git a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp index 1320f9985553..1d058ccfb633 100644 --- a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -202,7 +202,7 @@ namespace { bool foldImmediate(MachineInstr &MI, SmallSet<unsigned, 4> &ImmDefRegs, DenseMap<unsigned, MachineInstr*> &ImmDefMIs); - /// \brief Finds recurrence cycles, but only ones that formulated around + /// Finds recurrence cycles, but only ones that formulated around /// a def operand and a use operand that are tied. If there is a use /// operand commutable with the tied use operand, find recurrence cycle /// along that operand as well. @@ -210,7 +210,7 @@ namespace { const SmallSet<unsigned, 2> &TargetReg, RecurrenceCycle &RC); - /// \brief If copy instruction \p MI is a virtual register copy, track it in + /// If copy instruction \p MI is a virtual register copy, track it in /// the set \p CopySrcRegs and \p CopyMIs. If this virtual register was /// previously seen as a copy, replace the uses of this copy with the /// previously seen copy's destination register. @@ -221,7 +221,7 @@ namespace { /// Is the register \p Reg a non-allocatable physical register? bool isNAPhysCopy(unsigned Reg); - /// \brief If copy instruction \p MI is a non-allocatable virtual<->physical + /// If copy instruction \p MI is a non-allocatable virtual<->physical /// register copy, track it in the \p NAPhysToVirtMIs map. If this /// non-allocatable physical register was previously copied to a virtual /// registered and hasn't been clobbered, the virt->phys copy can be @@ -232,7 +232,7 @@ namespace { bool isLoadFoldable(MachineInstr &MI, SmallSet<unsigned, 16> &FoldAsLoadDefCandidates); - /// \brief Check whether \p MI is understood by the register coalescer + /// Check whether \p MI is understood by the register coalescer /// but may require some rewriting. bool isCoalescableCopy(const MachineInstr &MI) { // SubregToRegs are not interesting, because they are already register @@ -242,7 +242,7 @@ namespace { MI.isExtractSubreg())); } - /// \brief Check whether \p MI is a copy like instruction that is + /// Check whether \p MI is a copy like instruction that is /// not recognized by the register coalescer. bool isUncoalescableCopy(const MachineInstr &MI) { return MI.isBitcast() || @@ -345,7 +345,7 @@ namespace { } }; - /// \brief Helper class to track the possible sources of a value defined by + /// Helper class to track the possible sources of a value defined by /// a (chain of) copy related instructions. /// Given a definition (instruction and definition index), this class /// follows the use-def chain to find successive suitable sources. @@ -425,7 +425,7 @@ namespace { } } - /// \brief Following the use-def chain, get the next available source + /// Following the use-def chain, get the next available source /// for the tracked value. /// \return A ValueTrackerResult containing a set of registers /// and sub registers with tracked values. A ValueTrackerResult with @@ -646,7 +646,7 @@ bool PeepholeOptimizer::optimizeCondBranch(MachineInstr &MI) { return TII->optimizeCondBranch(MI); } -/// \brief Try to find the next source that share the same register file +/// Try to find the next source that share the same register file /// for the value defined by \p Reg and \p SubReg. /// When true is returned, the \p RewriteMap can be used by the client to /// retrieve all Def -> Use along the way up to the next source. Any found @@ -696,7 +696,8 @@ bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg, // An existent entry with multiple sources is a PHI cycle we must avoid. // Otherwise it's an entry with a valid next source we already found. if (CurSrcRes.getNumSources() > 1) { - DEBUG(dbgs() << "findNextSource: found PHI cycle, aborting...\n"); + LLVM_DEBUG(dbgs() + << "findNextSource: found PHI cycle, aborting...\n"); return false; } break; @@ -709,7 +710,7 @@ bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg, if (NumSrcs > 1) { PHICount++; if (PHICount >= RewritePHILimit) { - DEBUG(dbgs() << "findNextSource: PHI limit reached\n"); + LLVM_DEBUG(dbgs() << "findNextSource: PHI limit reached\n"); return false; } @@ -746,7 +747,7 @@ bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg, return CurSrcPair.Reg != Reg; } -/// \brief Insert a PHI instruction with incoming edges \p SrcRegs that are +/// Insert a PHI instruction with incoming edges \p SrcRegs that are /// guaranteed to have the same register class. This is necessary whenever we /// successfully traverse a PHI instruction and find suitable sources coming /// from its edges. By inserting a new PHI, we provide a rewritten PHI def @@ -791,7 +792,7 @@ public: Rewriter(MachineInstr &CopyLike) : CopyLike(CopyLike) {} virtual ~Rewriter() {} - /// \brief Get the next rewritable source (SrcReg, SrcSubReg) and + /// Get the next rewritable source (SrcReg, SrcSubReg) and /// the related value that it affects (DstReg, DstSubReg). /// A source is considered rewritable if its register class and the /// register class of the related DstReg may not be register @@ -859,7 +860,7 @@ public: } }; -/// \brief Helper class to rewrite uncoalescable copy like instructions +/// Helper class to rewrite uncoalescable copy like instructions /// into new COPY (coalescable friendly) instructions. class UncoalescableRewriter : public Rewriter { unsigned NumDefs; ///< Number of defs in the bitcast. @@ -1101,7 +1102,7 @@ static Rewriter *getCopyRewriter(MachineInstr &MI, const TargetInstrInfo &TII) { } } -/// \brief Given a \p Def.Reg and Def.SubReg pair, use \p RewriteMap to find +/// Given a \p Def.Reg and Def.SubReg pair, use \p RewriteMap to find /// the new source to use for rewrite. If \p HandleMultipleSources is true and /// multiple sources for a given \p Def are found along the way, we found a /// PHI instructions that needs to be rewritten. @@ -1143,9 +1144,9 @@ getNewSource(MachineRegisterInfo *MRI, const TargetInstrInfo *TII, // Build the new PHI node and return its def register as the new source. MachineInstr &OrigPHI = const_cast<MachineInstr &>(*Res.getInst()); MachineInstr &NewPHI = insertPHI(*MRI, *TII, NewPHISrcs, OrigPHI); - DEBUG(dbgs() << "-- getNewSource\n"); - DEBUG(dbgs() << " Replacing: " << OrigPHI); - DEBUG(dbgs() << " With: " << NewPHI); + LLVM_DEBUG(dbgs() << "-- getNewSource\n"); + LLVM_DEBUG(dbgs() << " Replacing: " << OrigPHI); + LLVM_DEBUG(dbgs() << " With: " << NewPHI); const MachineOperand &MODef = NewPHI.getOperand(0); return RegSubRegPair(MODef.getReg(), MODef.getSubReg()); } @@ -1213,7 +1214,7 @@ bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &MI) { return Changed; } -/// \brief Rewrite the source found through \p Def, by using the \p RewriteMap +/// Rewrite the source found through \p Def, by using the \p RewriteMap /// and create a new COPY instruction. More info about RewriteMap in /// PeepholeOptimizer::findNextSource. Right now this is only used to handle /// Uncoalescable copies, since they are copy like instructions that aren't @@ -1241,9 +1242,9 @@ PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike, NewCopy->getOperand(0).setIsUndef(); } - DEBUG(dbgs() << "-- RewriteSource\n"); - DEBUG(dbgs() << " Replacing: " << CopyLike); - DEBUG(dbgs() << " With: " << *NewCopy); + LLVM_DEBUG(dbgs() << "-- RewriteSource\n"); + LLVM_DEBUG(dbgs() << " Replacing: " << CopyLike); + LLVM_DEBUG(dbgs() << " With: " << *NewCopy); MRI->replaceRegWith(Def.Reg, NewVReg); MRI->clearKillFlags(NewVReg); @@ -1254,7 +1255,7 @@ PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike, return *NewCopy; } -/// \brief Optimize copy-like instructions to create +/// Optimize copy-like instructions to create /// register coalescer friendly instruction. /// The optimization tries to kill-off the \p MI by looking /// through a chain of copies to find a source that has a compatible @@ -1462,7 +1463,8 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy( if (PrevCopy == NAPhysToVirtMIs.end()) { // We can't remove the copy: there was an intervening clobber of the // non-allocatable physical register after the copy to virtual. - DEBUG(dbgs() << "NAPhysCopy: intervening clobber forbids erasing " << MI); + LLVM_DEBUG(dbgs() << "NAPhysCopy: intervening clobber forbids erasing " + << MI); return false; } @@ -1470,7 +1472,7 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy( if (PrevDstReg == SrcReg) { // Remove the virt->phys copy: we saw the virtual register definition, and // the non-allocatable physical register's state hasn't changed since then. - DEBUG(dbgs() << "NAPhysCopy: erasing " << MI); + LLVM_DEBUG(dbgs() << "NAPhysCopy: erasing " << MI); ++NumNAPhysCopies; return true; } @@ -1479,7 +1481,7 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy( // register get a copy of the non-allocatable physical register, and we only // track one such copy. Avoid getting confused by this new non-allocatable // physical register definition, and remove it from the tracked copies. - DEBUG(dbgs() << "NAPhysCopy: missed opportunity " << MI); + LLVM_DEBUG(dbgs() << "NAPhysCopy: missed opportunity " << MI); NAPhysToVirtMIs.erase(PrevCopy); return false; } @@ -1575,15 +1577,15 @@ bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &PHI) { if (findTargetRecurrence(PHI.getOperand(0).getReg(), TargetRegs, RC)) { // Commutes operands of instructions in RC if necessary so that the copy to // be generated from PHI can be coalesced. - DEBUG(dbgs() << "Optimize recurrence chain from " << PHI); + LLVM_DEBUG(dbgs() << "Optimize recurrence chain from " << PHI); for (auto &RI : RC) { - DEBUG(dbgs() << "\tInst: " << *(RI.getMI())); + LLVM_DEBUG(dbgs() << "\tInst: " << *(RI.getMI())); auto CP = RI.getCommutePair(); if (CP) { Changed = true; TII->commuteInstruction(*(RI.getMI()), false, (*CP).first, (*CP).second); - DEBUG(dbgs() << "\t\tCommuted: " << *(RI.getMI())); + LLVM_DEBUG(dbgs() << "\t\tCommuted: " << *(RI.getMI())); } } } @@ -1595,8 +1597,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; - DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n"); - DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n'); + LLVM_DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n"); + LLVM_DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n'); if (DisablePeephole) return false; @@ -1643,8 +1645,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { ++MII; LocalMIs.insert(MI); - // Skip debug values. They should not affect this peephole optimization. - if (MI->isDebugValue()) + // Skip debug instructions. They should not affect this peephole optimization. + if (MI->isDebugInstr()) continue; if (MI->isPosition()) @@ -1667,7 +1669,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (Def != NAPhysToVirtMIs.end()) { // A new definition of the non-allocatable physical register // invalidates previous copies. - DEBUG(dbgs() << "NAPhysCopy: invalidating because of " << *MI); + LLVM_DEBUG(dbgs() + << "NAPhysCopy: invalidating because of " << *MI); NAPhysToVirtMIs.erase(Def); } } @@ -1676,7 +1679,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { for (auto &RegMI : NAPhysToVirtMIs) { unsigned Def = RegMI.first; if (MachineOperand::clobbersPhysReg(RegMask, Def)) { - DEBUG(dbgs() << "NAPhysCopy: invalidating because of " << *MI); + LLVM_DEBUG(dbgs() + << "NAPhysCopy: invalidating because of " << *MI); NAPhysToVirtMIs.erase(Def); } } @@ -1692,7 +1696,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { // don't know what's correct anymore. // // FIXME: handle explicit asm clobbers. - DEBUG(dbgs() << "NAPhysCopy: blowing away all info due to " << *MI); + LLVM_DEBUG(dbgs() << "NAPhysCopy: blowing away all info due to " + << *MI); NAPhysToVirtMIs.clear(); } @@ -1768,8 +1773,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { TII->optimizeLoadInstr(*MI, MRI, FoldAsLoadDefReg, DefMI)) { // Update LocalMIs since we replaced MI with FoldMI and deleted // DefMI. - DEBUG(dbgs() << "Replacing: " << *MI); - DEBUG(dbgs() << " With: " << *FoldMI); + LLVM_DEBUG(dbgs() << "Replacing: " << *MI); + LLVM_DEBUG(dbgs() << " With: " << *FoldMI); LocalMIs.erase(MI); LocalMIs.erase(DefMI); LocalMIs.insert(FoldMI); @@ -1791,7 +1796,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { // the load candidates. Note: We might be able to fold *into* this // instruction, so this needs to be after the folding logic. if (MI->isLoadFoldBarrier()) { - DEBUG(dbgs() << "Encountered load fold barrier on " << *MI); + LLVM_DEBUG(dbgs() << "Encountered load fold barrier on " << *MI); FoldAsLoadDefCandidates.clear(); } } diff --git a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp index 5d86faafdd85..215da630caf4 100644 --- a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp +++ b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp @@ -38,6 +38,7 @@ #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -242,11 +243,11 @@ void SchedulePostRATDList::enterRegion(MachineBasicBlock *bb, /// Print the schedule before exiting the region. void SchedulePostRATDList::exitRegion() { - DEBUG({ - dbgs() << "*** Final schedule ***\n"; - dumpSchedule(); - dbgs() << '\n'; - }); + LLVM_DEBUG({ + dbgs() << "*** Final schedule ***\n"; + dumpSchedule(); + dbgs() << '\n'; + }); ScheduleDAGInstrs::exitRegion(); } @@ -308,7 +309,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { : TargetSubtargetInfo::ANTIDEP_NONE); } - DEBUG(dbgs() << "PostRAScheduler\n"); + LLVM_DEBUG(dbgs() << "PostRAScheduler\n"); SchedulePostRATDList Scheduler(Fn, MLI, AA, RegClassInfo, AntiDepMode, CriticalPathRCs); @@ -412,13 +413,12 @@ void SchedulePostRATDList::schedule() { postprocessDAG(); - DEBUG(dbgs() << "********** List Scheduling **********\n"); - DEBUG( - for (const SUnit &SU : SUnits) { - SU.dumpAll(this); - dbgs() << '\n'; - } - ); + LLVM_DEBUG(dbgs() << "********** List Scheduling **********\n"); + LLVM_DEBUG(for (const SUnit &SU + : SUnits) { + SU.dumpAll(this); + dbgs() << '\n'; + }); AvailableQueue.initNodes(SUnits); ListScheduleTopDown(); @@ -501,8 +501,8 @@ void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) { /// count of its successors. If a successor pending count is zero, add it to /// the Available queue. void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { - DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); - DEBUG(SU->dump(this)); + LLVM_DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); + LLVM_DEBUG(SU->dump(this)); Sequence.push_back(SU); assert(CurCycle >= SU->getDepth() && @@ -516,7 +516,7 @@ void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { /// emitNoop - Add a noop to the current instruction sequence. void SchedulePostRATDList::emitNoop(unsigned CurCycle) { - DEBUG(dbgs() << "*** Emitting noop in cycle " << CurCycle << '\n'); + LLVM_DEBUG(dbgs() << "*** Emitting noop in cycle " << CurCycle << '\n'); HazardRec->EmitNoop(); Sequence.push_back(nullptr); // NULL here means noop ++NumNoops; @@ -568,7 +568,8 @@ void SchedulePostRATDList::ListScheduleTopDown() { MinDepth = PendingQueue[i]->getDepth(); } - DEBUG(dbgs() << "\n*** Examining Available\n"; AvailableQueue.dump(this)); + LLVM_DEBUG(dbgs() << "\n*** Examining Available\n"; + AvailableQueue.dump(this)); SUnit *FoundSUnit = nullptr, *NotPreferredSUnit = nullptr; bool HasNoopHazards = false; @@ -604,7 +605,8 @@ void SchedulePostRATDList::ListScheduleTopDown() { // non-preferred node. if (NotPreferredSUnit) { if (!FoundSUnit) { - DEBUG(dbgs() << "*** Will schedule a non-preferred instruction...\n"); + LLVM_DEBUG( + dbgs() << "*** Will schedule a non-preferred instruction...\n"); FoundSUnit = NotPreferredSUnit; } else { AvailableQueue.push(NotPreferredSUnit); @@ -631,19 +633,20 @@ void SchedulePostRATDList::ListScheduleTopDown() { HazardRec->EmitInstruction(FoundSUnit); CycleHasInsts = true; if (HazardRec->atIssueLimit()) { - DEBUG(dbgs() << "*** Max instructions per cycle " << CurCycle << '\n'); + LLVM_DEBUG(dbgs() << "*** Max instructions per cycle " << CurCycle + << '\n'); HazardRec->AdvanceCycle(); ++CurCycle; CycleHasInsts = false; } } else { if (CycleHasInsts) { - DEBUG(dbgs() << "*** Finished cycle " << CurCycle << '\n'); + LLVM_DEBUG(dbgs() << "*** Finished cycle " << CurCycle << '\n'); HazardRec->AdvanceCycle(); } else if (!HasNoopHazards) { // Otherwise, we have a pipeline stall, but no other problem, // just advance the current cycle and try again. - DEBUG(dbgs() << "*** Stall in cycle " << CurCycle << '\n'); + LLVM_DEBUG(dbgs() << "*** Stall in cycle " << CurCycle << '\n'); HazardRec->AdvanceCycle(); ++NumStalls; } else { diff --git a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp index 48b48c5f6499..7e9b4af12ee9 100644 --- a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp @@ -44,7 +44,7 @@ public: void getAnalysisUsage(AnalysisUsage &au) const override; - bool runOnMachineFunction(MachineFunction &fn) override; + bool runOnMachineFunction(MachineFunction &MF) override; }; } // end anonymous namespace @@ -73,7 +73,7 @@ bool ProcessImplicitDefs::canTurnIntoImplicitDef(MachineInstr *MI) { } void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) { - DEBUG(dbgs() << "Processing " << *MI); + LLVM_DEBUG(dbgs() << "Processing " << *MI); unsigned Reg = MI->getOperand(0).getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) { @@ -84,7 +84,7 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) { MachineInstr *UserMI = MO.getParent(); if (!canTurnIntoImplicitDef(UserMI)) continue; - DEBUG(dbgs() << "Converting to IMPLICIT_DEF: " << *UserMI); + LLVM_DEBUG(dbgs() << "Converting to IMPLICIT_DEF: " << *UserMI); UserMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); WorkList.insert(UserMI); } @@ -116,7 +116,7 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) { // If we found the using MI, we can erase the IMPLICIT_DEF. if (Found) { - DEBUG(dbgs() << "Physreg user: " << *UserMI); + LLVM_DEBUG(dbgs() << "Physreg user: " << *UserMI); MI->eraseFromParent(); return; } @@ -125,15 +125,15 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) { // Leave the physreg IMPLICIT_DEF, but trim any extra operands. for (unsigned i = MI->getNumOperands() - 1; i; --i) MI->RemoveOperand(i); - DEBUG(dbgs() << "Keeping physreg: " << *MI); + LLVM_DEBUG(dbgs() << "Keeping physreg: " << *MI); } /// processImplicitDefs - Process IMPLICIT_DEF instructions and turn them into /// <undef> operands. bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) { - DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n" - << "********** Function: " << MF.getName() << '\n'); + LLVM_DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n" + << "********** Function: " << MF.getName() << '\n'); bool Changed = false; @@ -154,8 +154,8 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) { if (WorkList.empty()) continue; - DEBUG(dbgs() << printMBBReference(*MFI) << " has " << WorkList.size() - << " implicit defs.\n"); + LLVM_DEBUG(dbgs() << printMBBReference(*MFI) << " has " << WorkList.size() + << " implicit defs.\n"); Changed = true; // Drain the WorkList to recursively process any new implicit defs. diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp index a8d8ad8ac7dc..fc62c8caf59e 100644 --- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -38,7 +38,6 @@ #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/CodeGen/StackProtector.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" @@ -90,7 +89,7 @@ public: /// runOnMachineFunction - Insert prolog/epilog code and replace abstract /// frame indexes with appropriate references. - bool runOnMachineFunction(MachineFunction &Fn) override; + bool runOnMachineFunction(MachineFunction &MF) override; private: RegScavenger *RS; @@ -117,15 +116,15 @@ private: // Emit remarks. MachineOptimizationRemarkEmitter *ORE = nullptr; - void calculateCallFrameInfo(MachineFunction &Fn); - void calculateSaveRestoreBlocks(MachineFunction &Fn); + void calculateCallFrameInfo(MachineFunction &MF); + void calculateSaveRestoreBlocks(MachineFunction &MF); void spillCalleeSavedRegs(MachineFunction &MF); - void calculateFrameObjectOffsets(MachineFunction &Fn); - void replaceFrameIndices(MachineFunction &Fn); - void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, + void calculateFrameObjectOffsets(MachineFunction &MF); + void replaceFrameIndices(MachineFunction &MF); + void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF, int &SPAdj); - void insertPrologEpilogCode(MachineFunction &Fn); + void insertPrologEpilogCode(MachineFunction &MF); }; } // end anonymous namespace @@ -143,7 +142,6 @@ INITIALIZE_PASS_BEGIN(PEI, DEBUG_TYPE, "Prologue/Epilogue Insertion", false, false) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(StackProtector) INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass) INITIALIZE_PASS_END(PEI, DEBUG_TYPE, "Prologue/Epilogue Insertion & Frame Finalization", false, @@ -160,7 +158,6 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addPreserved<MachineLoopInfo>(); AU.addPreserved<MachineDominatorTree>(); - AU.addRequired<StackProtector>(); AU.addRequired<MachineOptimizationRemarkEmitterPass>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -170,36 +167,36 @@ using StackObjSet = SmallSetVector<int, 8>; /// runOnMachineFunction - Insert prolog/epilog code and replace abstract /// frame indexes with appropriate references. -bool PEI::runOnMachineFunction(MachineFunction &Fn) { - const Function &F = Fn.getFunction(); - const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); - const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); +bool PEI::runOnMachineFunction(MachineFunction &MF) { + const Function &F = MF.getFunction(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); - RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : nullptr; - FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn); + RS = TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr; + FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(MF); FrameIndexEliminationScavenging = (RS && !FrameIndexVirtualScavenging) || - TRI->requiresFrameIndexReplacementScavenging(Fn); + TRI->requiresFrameIndexReplacementScavenging(MF); ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE(); // Calculate the MaxCallFrameSize and AdjustsStack variables for the // function's frame information. Also eliminates call frame pseudo // instructions. - calculateCallFrameInfo(Fn); + calculateCallFrameInfo(MF); // Determine placement of CSR spill/restore code and prolog/epilog code: // place all spills in the entry block, all restores in return blocks. - calculateSaveRestoreBlocks(Fn); + calculateSaveRestoreBlocks(MF); // Handle CSR spilling and restoring, for targets that need it. - if (Fn.getTarget().usesPhysRegsForPEI()) - spillCalleeSavedRegs(Fn); + if (MF.getTarget().usesPhysRegsForPEI()) + spillCalleeSavedRegs(MF); // Allow the target machine to make final modifications to the function // before the frame layout is finalized. - TFI->processFunctionBeforeFrameFinalized(Fn, RS); + TFI->processFunctionBeforeFrameFinalized(MF, RS); // Calculate actual frame offsets for all abstract stack objects... - calculateFrameObjectOffsets(Fn); + calculateFrameObjectOffsets(MF); // Add prolog and epilog code to the function. This function is required // to align the stack frame as necessary for any stack variables or @@ -207,26 +204,32 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // must be called before this function in order to set the AdjustsStack // and MaxCallFrameSize variables. if (!F.hasFnAttribute(Attribute::Naked)) - insertPrologEpilogCode(Fn); + insertPrologEpilogCode(MF); // Replace all MO_FrameIndex operands with physical register references // and actual offsets. // - replaceFrameIndices(Fn); + replaceFrameIndices(MF); // If register scavenging is needed, as we've enabled doing it as a // post-pass, scavenge the virtual registers that frame index elimination // inserted. - if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) - scavengeFrameVirtualRegs(Fn, *RS); + if (TRI->requiresRegisterScavenging(MF) && FrameIndexVirtualScavenging) + scavengeFrameVirtualRegs(MF, *RS); // Warn on stack size when we exceeds the given limit. - MachineFrameInfo &MFI = Fn.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); uint64_t StackSize = MFI.getStackSize(); if (WarnStackSize.getNumOccurrences() > 0 && WarnStackSize < StackSize) { DiagnosticInfoStackSize DiagStackSize(F, StackSize); F.getContext().diagnose(DiagStackSize); } + ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "StackSize", + MF.getFunction().getSubprogram(), + &MF.front()) + << ore::NV("NumStackBytes", StackSize) << " stack bytes in function"; + }); delete RS; SaveBlocks.clear(); @@ -239,10 +242,10 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { /// Calculate the MaxCallFrameSize and AdjustsStack /// variables for the function's frame information and eliminate call frame /// pseudo instructions. -void PEI::calculateCallFrameInfo(MachineFunction &Fn) { - const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo(); - const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); - MachineFrameInfo &MFI = Fn.getFrameInfo(); +void PEI::calculateCallFrameInfo(MachineFunction &MF) { + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + MachineFrameInfo &MFI = MF.getFrameInfo(); unsigned MaxCallFrameSize = 0; bool AdjustsStack = MFI.adjustsStack(); @@ -257,7 +260,7 @@ void PEI::calculateCallFrameInfo(MachineFunction &Fn) { return; std::vector<MachineBasicBlock::iterator> FrameSDOps; - for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) + for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB) for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) if (TII.isFrameInstr(*I)) { unsigned Size = TII.getFrameSize(*I); @@ -285,15 +288,15 @@ void PEI::calculateCallFrameInfo(MachineFunction &Fn) { // the target doesn't indicate otherwise, remove the call frame pseudos // here. The sub/add sp instruction pairs are still inserted, but we don't // need to track the SP adjustment for frame index elimination. - if (TFI->canSimplifyCallFramePseudos(Fn)) - TFI->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I); + if (TFI->canSimplifyCallFramePseudos(MF)) + TFI->eliminateCallFramePseudoInstr(MF, *I->getParent(), I); } } /// Compute the sets of entry and return blocks for saving and restoring /// callee-saved registers, and placing prolog and epilog code. -void PEI::calculateSaveRestoreBlocks(MachineFunction &Fn) { - const MachineFrameInfo &MFI = Fn.getFrameInfo(); +void PEI::calculateSaveRestoreBlocks(MachineFunction &MF) { + const MachineFrameInfo &MFI = MF.getFrameInfo(); // Even when we do not change any CSR, we still want to insert the // prologue and epilogue of the function. @@ -313,8 +316,8 @@ void PEI::calculateSaveRestoreBlocks(MachineFunction &Fn) { } // Save refs to entry and return blocks. - SaveBlocks.push_back(&Fn.front()); - for (MachineBasicBlock &MBB : Fn) { + SaveBlocks.push_back(&MF.front()); + for (MachineBasicBlock &MBB : MF) { if (MBB.isEHFuncletEntry()) SaveBlocks.push_back(&MBB); if (MBB.isReturnBlock()) @@ -457,10 +460,10 @@ static void updateLiveness(MachineFunction &MF) { /// Insert restore code for the callee-saved registers used in the function. static void insertCSRSaves(MachineBasicBlock &SaveBlock, ArrayRef<CalleeSavedInfo> CSI) { - MachineFunction &Fn = *SaveBlock.getParent(); - const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo(); - const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); - const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); + MachineFunction &MF = *SaveBlock.getParent(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); MachineBasicBlock::iterator I = SaveBlock.begin(); if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) { @@ -477,10 +480,10 @@ static void insertCSRSaves(MachineBasicBlock &SaveBlock, /// Insert restore code for the callee-saved registers used in the function. static void insertCSRRestores(MachineBasicBlock &RestoreBlock, std::vector<CalleeSavedInfo> &CSI) { - MachineFunction &Fn = *RestoreBlock.getParent(); - const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo(); - const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); - const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); + MachineFunction &MF = *RestoreBlock.getParent(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); // Restore all registers immediately before the return and any // terminators that precede it. @@ -499,27 +502,27 @@ static void insertCSRRestores(MachineBasicBlock &RestoreBlock, } } -void PEI::spillCalleeSavedRegs(MachineFunction &Fn) { +void PEI::spillCalleeSavedRegs(MachineFunction &MF) { // We can't list this requirement in getRequiredProperties because some // targets (WebAssembly) use virtual registers past this point, and the pass // pipeline is set up without giving the passes a chance to look at the // TargetMachine. // FIXME: Find a way to express this in getRequiredProperties. - assert(Fn.getProperties().hasProperty( + assert(MF.getProperties().hasProperty( MachineFunctionProperties::Property::NoVRegs)); - const Function &F = Fn.getFunction(); - const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); - MachineFrameInfo &MFI = Fn.getFrameInfo(); + const Function &F = MF.getFunction(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + MachineFrameInfo &MFI = MF.getFrameInfo(); MinCSFrameIndex = std::numeric_limits<unsigned>::max(); MaxCSFrameIndex = 0; // Determine which of the registers in the callee save list should be saved. BitVector SavedRegs; - TFI->determineCalleeSaves(Fn, SavedRegs, RS); + TFI->determineCalleeSaves(MF, SavedRegs, RS); // Assign stack slots for any callee-saved registers that must be spilled. - assignCalleeSavedSpillSlots(Fn, SavedRegs, MinCSFrameIndex, MaxCSFrameIndex); + assignCalleeSavedSpillSlots(MF, SavedRegs, MinCSFrameIndex, MaxCSFrameIndex); // Add the code to save and restore the callee saved registers. if (!F.hasFnAttribute(Attribute::Naked)) { @@ -531,7 +534,7 @@ void PEI::spillCalleeSavedRegs(MachineFunction &Fn) { insertCSRSaves(*SaveBlock, CSI); // Update the live-in information of all the blocks up to the save // point. - updateLiveness(Fn); + updateLiveness(MF); } for (MachineBasicBlock *RestoreBlock : RestoreBlocks) insertCSRRestores(*RestoreBlock, CSI); @@ -558,10 +561,12 @@ AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx, Offset = alignTo(Offset, Align, Skew); if (StackGrowsDown) { - DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset << "]\n"); + LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset + << "]\n"); MFI.setObjectOffset(FrameIdx, -Offset); // Set the computed offset } else { - DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset << "]\n"); + LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset + << "]\n"); MFI.setObjectOffset(FrameIdx, Offset); Offset += MFI.getObjectSize(FrameIdx); } @@ -654,12 +659,12 @@ static inline bool scavengeStackSlot(MachineFrameInfo &MFI, int FrameIdx, if (StackGrowsDown) { int ObjStart = -(FreeStart + ObjSize); - DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP[" << ObjStart - << "]\n"); + LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP[" + << ObjStart << "]\n"); MFI.setObjectOffset(FrameIdx, ObjStart); } else { - DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP[" << FreeStart - << "]\n"); + LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP[" + << FreeStart << "]\n"); MFI.setObjectOffset(FrameIdx, FreeStart); } @@ -685,15 +690,14 @@ AssignProtectedObjSet(const StackObjSet &UnassignedObjs, /// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the /// abstract stack objects. -void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { - const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); - StackProtector *SP = &getAnalysis<StackProtector>(); +void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { + const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); bool StackGrowsDown = TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; // Loop over all of the stack objects, assigning sequential addresses... - MachineFrameInfo &MFI = Fn.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); // Start at the beginning of the local area. // The Offset is the distance from the stack top in the direction @@ -706,7 +710,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { int64_t Offset = LocalAreaOffset; // Skew to be applied to alignment. - unsigned Skew = TFI.getStackAlignmentSkew(Fn); + unsigned Skew = TFI.getStackAlignmentSkew(MF); // If there are fixed sized objects that are preallocated in the local area, // non-fixed objects can't be allocated right at the start of local area. @@ -739,7 +743,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // Adjust to alignment boundary Offset = alignTo(Offset, Align, Skew); - DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << -Offset << "]\n"); + LLVM_DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << -Offset << "]\n"); MFI.setObjectOffset(i, -Offset); // Set the computed offset } } else if (MaxCSFrameIndex >= MinCSFrameIndex) { @@ -752,7 +756,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // Adjust to alignment boundary Offset = alignTo(Offset, Align, Skew); - DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << Offset << "]\n"); + LLVM_DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << Offset << "]\n"); MFI.setObjectOffset(i, Offset); Offset += MFI.getObjectSize(i); } @@ -766,11 +770,11 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // Make sure the special register scavenging spill slot is closest to the // incoming stack pointer if a frame pointer is required and is closer // to the incoming rather than the final stack pointer. - const TargetRegisterInfo *RegInfo = Fn.getSubtarget().getRegisterInfo(); - bool EarlyScavengingSlots = (TFI.hasFP(Fn) && + const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); + bool EarlyScavengingSlots = (TFI.hasFP(MF) && TFI.isFPCloseToIncomingSP() && - RegInfo->useFPForScavengingIndex(Fn) && - !RegInfo->needsStackRealignment(Fn)); + RegInfo->useFPForScavengingIndex(MF) && + !RegInfo->needsStackRealignment(MF)); if (RS && EarlyScavengingSlots) { SmallVector<int, 2> SFIs; RS->getScavengingFrameIndices(SFIs); @@ -789,14 +793,14 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // Adjust to alignment boundary. Offset = alignTo(Offset, Align, Skew); - DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n"); + LLVM_DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n"); // Resolve offsets for objects in the local block. for (unsigned i = 0, e = MFI.getLocalFrameObjectCount(); i != e; ++i) { std::pair<int, int64_t> Entry = MFI.getLocalFrameObjectMap(i); int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second; - DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" << - FIOffset << "]\n"); + LLVM_DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" << FIOffset + << "]\n"); MFI.setObjectOffset(Entry.first, FIOffset); } // Allocate the local block @@ -807,7 +811,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // Retrieve the Exception Handler registration node. int EHRegNodeFrameIndex = std::numeric_limits<int>::max(); - if (const WinEHFuncInfo *FuncInfo = Fn.getWinEHFuncInfo()) + if (const WinEHFuncInfo *FuncInfo = MF.getWinEHFuncInfo()) EHRegNodeFrameIndex = FuncInfo->EHRegNodeFrameIndex; // Make sure that the stack protector comes before the local variables on the @@ -836,16 +840,16 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { EHRegNodeFrameIndex == (int)i) continue; - switch (SP->getSSPLayout(MFI.getObjectAllocation(i))) { - case StackProtector::SSPLK_None: + switch (MFI.getObjectSSPLayout(i)) { + case MachineFrameInfo::SSPLK_None: continue; - case StackProtector::SSPLK_SmallArray: + case MachineFrameInfo::SSPLK_SmallArray: SmallArrayObjs.insert(i); continue; - case StackProtector::SSPLK_AddrOf: + case MachineFrameInfo::SSPLK_AddrOf: AddrOfObjs.insert(i); continue; - case StackProtector::SSPLK_LargeArray: + case MachineFrameInfo::SSPLK_LargeArray: LargeArrayObjs.insert(i); continue; } @@ -889,9 +893,9 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { MaxAlign, Skew); // Give the targets a chance to order the objects the way they like it. - if (Fn.getTarget().getOptLevel() != CodeGenOpt::None && - Fn.getTarget().Options.StackSymbolOrdering) - TFI.orderFrameObjects(Fn, ObjectsToAllocate); + if (MF.getTarget().getOptLevel() != CodeGenOpt::None && + MF.getTarget().Options.StackSymbolOrdering) + TFI.orderFrameObjects(MF, ObjectsToAllocate); // Keep track of which bytes in the fixed and callee-save range are used so we // can use the holes when allocating later stack objects. Only do this if @@ -899,8 +903,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // optimizing. BitVector StackBytesFree; if (!ObjectsToAllocate.empty() && - Fn.getTarget().getOptLevel() != CodeGenOpt::None && - MFI.getStackProtectorIndex() < 0 && TFI.enableStackSlotScavenging(Fn)) + MF.getTarget().getOptLevel() != CodeGenOpt::None && + MFI.getStackProtectorIndex() < 0 && TFI.enableStackSlotScavenging(MF)) computeFreeStackSlots(MFI, StackGrowsDown, MinCSFrameIndex, MaxCSFrameIndex, FixedCSEnd, StackBytesFree); @@ -924,7 +928,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // If we have reserved argument space for call sites in the function // immediately on entry to the current function, count it as part of the // overall stack size. - if (MFI.adjustsStack() && TFI.hasReservedCallFrame(Fn)) + if (MFI.adjustsStack() && TFI.hasReservedCallFrame(MF)) Offset += MFI.getMaxCallFrameSize(); // Round up the size to a multiple of the alignment. If the function has @@ -934,7 +938,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // value. unsigned StackAlign; if (MFI.adjustsStack() || MFI.hasVarSizedObjects() || - (RegInfo->needsStackRealignment(Fn) && MFI.getObjectIndexEnd() != 0)) + (RegInfo->needsStackRealignment(MF) && MFI.getObjectIndexEnd() != 0)) StackAlign = TFI.getStackAlignment(); else StackAlign = TFI.getTransientStackAlignment(); @@ -949,68 +953,61 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { int64_t StackSize = Offset - LocalAreaOffset; MFI.setStackSize(StackSize); NumBytesStackSpace += StackSize; - - ORE->emit([&]() { - return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "StackSize", - Fn.getFunction().getSubprogram(), - &Fn.front()) - << ore::NV("NumStackBytes", StackSize) << " stack bytes in function"; - }); } /// insertPrologEpilogCode - Scan the function for modified callee saved /// registers, insert spill code for these callee saved registers, then add /// prolog and epilog code to the function. -void PEI::insertPrologEpilogCode(MachineFunction &Fn) { - const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); +void PEI::insertPrologEpilogCode(MachineFunction &MF) { + const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); // Add prologue to the function... for (MachineBasicBlock *SaveBlock : SaveBlocks) - TFI.emitPrologue(Fn, *SaveBlock); + TFI.emitPrologue(MF, *SaveBlock); // Add epilogue to restore the callee-save registers in each exiting block. for (MachineBasicBlock *RestoreBlock : RestoreBlocks) - TFI.emitEpilogue(Fn, *RestoreBlock); + TFI.emitEpilogue(MF, *RestoreBlock); for (MachineBasicBlock *SaveBlock : SaveBlocks) - TFI.inlineStackProbe(Fn, *SaveBlock); + TFI.inlineStackProbe(MF, *SaveBlock); // Emit additional code that is required to support segmented stacks, if // we've been asked for it. This, when linked with a runtime with support // for segmented stacks (libgcc is one), will result in allocating stack // space in small chunks instead of one large contiguous block. - if (Fn.shouldSplitStack()) { + if (MF.shouldSplitStack()) { for (MachineBasicBlock *SaveBlock : SaveBlocks) - TFI.adjustForSegmentedStacks(Fn, *SaveBlock); + TFI.adjustForSegmentedStacks(MF, *SaveBlock); // Record that there are split-stack functions, so we will emit a // special section to tell the linker. - Fn.getMMI().setHasSplitStack(true); + MF.getMMI().setHasSplitStack(true); } else - Fn.getMMI().setHasNosplitStack(true); + MF.getMMI().setHasNosplitStack(true); // Emit additional code that is required to explicitly handle the stack in // HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The // approach is rather similar to that of Segmented Stacks, but it uses a // different conditional check and another BIF for allocating more stack // space. - if (Fn.getFunction().getCallingConv() == CallingConv::HiPE) + if (MF.getFunction().getCallingConv() == CallingConv::HiPE) for (MachineBasicBlock *SaveBlock : SaveBlocks) - TFI.adjustForHiPEPrologue(Fn, *SaveBlock); + TFI.adjustForHiPEPrologue(MF, *SaveBlock); } /// replaceFrameIndices - Replace all MO_FrameIndex operands with physical /// register references and actual offsets. -void PEI::replaceFrameIndices(MachineFunction &Fn) { - const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); - if (!TFI.needsFrameIndexResolution(Fn)) return; +void PEI::replaceFrameIndices(MachineFunction &MF) { + const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); + if (!TFI.needsFrameIndexResolution(MF)) return; // Store SPAdj at exit of a basic block. SmallVector<int, 8> SPState; - SPState.resize(Fn.getNumBlockIDs()); + SPState.resize(MF.getNumBlockIDs()); df_iterator_default_set<MachineBasicBlock*> Reachable; // Iterate over the reachable blocks in DFS order. - for (auto DFI = df_ext_begin(&Fn, Reachable), DFE = df_ext_end(&Fn, Reachable); + for (auto DFI = df_ext_begin(&MF, Reachable), DFE = df_ext_end(&MF, Reachable); DFI != DFE; ++DFI) { int SPAdj = 0; // Check the exit state of the DFS stack predecessor. @@ -1021,27 +1018,27 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { SPAdj = SPState[StackPred->getNumber()]; } MachineBasicBlock *BB = *DFI; - replaceFrameIndices(BB, Fn, SPAdj); + replaceFrameIndices(BB, MF, SPAdj); SPState[BB->getNumber()] = SPAdj; } // Handle the unreachable blocks. - for (auto &BB : Fn) { + for (auto &BB : MF) { if (Reachable.count(&BB)) // Already handled in DFS traversal. continue; int SPAdj = 0; - replaceFrameIndices(&BB, Fn, SPAdj); + replaceFrameIndices(&BB, MF, SPAdj); } } -void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, +void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF, int &SPAdj) { - assert(Fn.getSubtarget().getRegisterInfo() && + assert(MF.getSubtarget().getRegisterInfo() && "getRegisterInfo() must be implemented!"); - const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo(); - const TargetRegisterInfo &TRI = *Fn.getSubtarget().getRegisterInfo(); - const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); if (RS && FrameIndexEliminationScavenging) RS->enterBasicBlock(*BB); @@ -1052,7 +1049,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, if (TII.isFrameInstr(*I)) { InsideCallSequence = TII.isFrameSetup(*I); SPAdj += TII.getSPAdjust(*I); - I = TFI->eliminateCallFramePseudoInstr(Fn, *BB, I); + I = TFI->eliminateCallFramePseudoInstr(MF, *BB, I); continue; } @@ -1071,8 +1068,9 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, "operand of a DBG_VALUE machine instruction"); unsigned Reg; int64_t Offset = - TFI->getFrameIndexReference(Fn, MI.getOperand(0).getIndex(), Reg); + TFI->getFrameIndexReference(MF, MI.getOperand(0).getIndex(), Reg); MI.getOperand(0).ChangeToRegister(Reg, false /*isDef*/); + MI.getOperand(0).setIsDebug(); auto *DIExpr = DIExpression::prepend(MI.getDebugExpression(), DIExpression::NoDeref, Offset); MI.getOperand(3).setMetadata(DIExpr); @@ -1091,7 +1089,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, unsigned Reg; MachineOperand &Offset = MI.getOperand(i + 1); int refOffset = TFI->getFrameIndexReferencePreferSP( - Fn, MI.getOperand(i).getIndex(), Reg, /*IgnoreSPUpdates*/ false); + MF, MI.getOperand(i).getIndex(), Reg, /*IgnoreSPUpdates*/ false); Offset.setImm(Offset.getImm() + refOffset); MI.getOperand(i).ChangeToRegister(Reg, false /*isDef*/); continue; diff --git a/contrib/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/contrib/llvm/lib/CodeGen/ReachingDefAnalysis.cpp new file mode 100644 index 000000000000..050fef5d25ed --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ReachingDefAnalysis.cpp @@ -0,0 +1,195 @@ +//===---- ReachingDefAnalysis.cpp - Reaching Def Analysis ---*- C++ -*-----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/ReachingDefAnalysis.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "reaching-deps-analysis" + +char ReachingDefAnalysis::ID = 0; +INITIALIZE_PASS(ReachingDefAnalysis, DEBUG_TYPE, "ReachingDefAnalysis", false, + true) + +void ReachingDefAnalysis::enterBasicBlock( + const LoopTraversal::TraversedMBBInfo &TraversedMBB) { + + MachineBasicBlock *MBB = TraversedMBB.MBB; + unsigned MBBNumber = MBB->getNumber(); + assert(MBBNumber < MBBReachingDefs.size() && + "Unexpected basic block number."); + MBBReachingDefs[MBBNumber].resize(NumRegUnits); + + // Reset instruction counter in each basic block. + CurInstr = 0; + + // Set up LiveRegs to represent registers entering MBB. + // Default values are 'nothing happened a long time ago'. + if (LiveRegs.empty()) + LiveRegs.assign(NumRegUnits, ReachingDefDefaultVal); + + // This is the entry block. + if (MBB->pred_empty()) { + for (const auto &LI : MBB->liveins()) { + for (MCRegUnitIterator Unit(LI.PhysReg, TRI); Unit.isValid(); ++Unit) { + // Treat function live-ins as if they were defined just before the first + // instruction. Usually, function arguments are set up immediately + // before the call. + LiveRegs[*Unit] = -1; + MBBReachingDefs[MBBNumber][*Unit].push_back(LiveRegs[*Unit]); + } + } + LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << ": entry\n"); + return; + } + + // Try to coalesce live-out registers from predecessors. + for (MachineBasicBlock *pred : MBB->predecessors()) { + assert(unsigned(pred->getNumber()) < MBBOutRegsInfos.size() && + "Should have pre-allocated MBBInfos for all MBBs"); + const LiveRegsDefInfo &Incoming = MBBOutRegsInfos[pred->getNumber()]; + // Incoming is null if this is a backedge from a BB + // we haven't processed yet + if (Incoming.empty()) + continue; + + for (unsigned Unit = 0; Unit != NumRegUnits; ++Unit) { + // Use the most recent predecessor def for each register. + LiveRegs[Unit] = std::max(LiveRegs[Unit], Incoming[Unit]); + if ((LiveRegs[Unit] != ReachingDefDefaultVal)) + MBBReachingDefs[MBBNumber][Unit].push_back(LiveRegs[Unit]); + } + } + + LLVM_DEBUG(dbgs() << printMBBReference(*MBB) + << (!TraversedMBB.IsDone ? ": incomplete\n" + : ": all preds known\n")); +} + +void ReachingDefAnalysis::leaveBasicBlock( + const LoopTraversal::TraversedMBBInfo &TraversedMBB) { + assert(!LiveRegs.empty() && "Must enter basic block first."); + unsigned MBBNumber = TraversedMBB.MBB->getNumber(); + assert(MBBNumber < MBBOutRegsInfos.size() && + "Unexpected basic block number."); + // Save register clearances at end of MBB - used by enterBasicBlock(). + MBBOutRegsInfos[MBBNumber] = LiveRegs; + + // While processing the basic block, we kept `Def` relative to the start + // of the basic block for convenience. However, future use of this information + // only cares about the clearance from the end of the block, so adjust + // everything to be relative to the end of the basic block. + for (int &OutLiveReg : MBBOutRegsInfos[MBBNumber]) + OutLiveReg -= CurInstr; + LiveRegs.clear(); +} + +void ReachingDefAnalysis::processDefs(MachineInstr *MI) { + assert(!MI->isDebugInstr() && "Won't process debug instructions"); + + unsigned MBBNumber = MI->getParent()->getNumber(); + assert(MBBNumber < MBBReachingDefs.size() && + "Unexpected basic block number."); + const MCInstrDesc &MCID = MI->getDesc(); + for (unsigned i = 0, + e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); + i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.getReg()) + continue; + if (MO.isUse()) + continue; + for (MCRegUnitIterator Unit(MO.getReg(), TRI); Unit.isValid(); ++Unit) { + // This instruction explicitly defines the current reg unit. + LLVM_DEBUG(dbgs() << printReg(MO.getReg(), TRI) << ":\t" << CurInstr + << '\t' << *MI); + + // How many instructions since this reg unit was last written? + LiveRegs[*Unit] = CurInstr; + MBBReachingDefs[MBBNumber][*Unit].push_back(CurInstr); + } + } + InstIds[MI] = CurInstr; + ++CurInstr; +} + +void ReachingDefAnalysis::processBasicBlock( + const LoopTraversal::TraversedMBBInfo &TraversedMBB) { + enterBasicBlock(TraversedMBB); + for (MachineInstr &MI : *TraversedMBB.MBB) { + if (!MI.isDebugInstr()) + processDefs(&MI); + } + leaveBasicBlock(TraversedMBB); +} + +bool ReachingDefAnalysis::runOnMachineFunction(MachineFunction &mf) { + if (skipFunction(mf.getFunction())) + return false; + MF = &mf; + TRI = MF->getSubtarget().getRegisterInfo(); + + LiveRegs.clear(); + NumRegUnits = TRI->getNumRegUnits(); + + MBBReachingDefs.resize(mf.getNumBlockIDs()); + + LLVM_DEBUG(dbgs() << "********** REACHING DEFINITION ANALYSIS **********\n"); + + // Initialize the MBBOutRegsInfos + MBBOutRegsInfos.resize(mf.getNumBlockIDs()); + + // Traverse the basic blocks. + LoopTraversal Traversal; + LoopTraversal::TraversalOrder TraversedMBBOrder = Traversal.traverse(mf); + for (LoopTraversal::TraversedMBBInfo TraversedMBB : TraversedMBBOrder) { + processBasicBlock(TraversedMBB); + } + + // Sorting all reaching defs found for a ceartin reg unit in a given BB. + for (MBBDefsInfo &MBBDefs : MBBReachingDefs) { + for (MBBRegUnitDefs &RegUnitDefs : MBBDefs) + llvm::sort(RegUnitDefs.begin(), RegUnitDefs.end()); + } + + return false; +} + +void ReachingDefAnalysis::releaseMemory() { + // Clear the internal vectors. + MBBOutRegsInfos.clear(); + MBBReachingDefs.clear(); + InstIds.clear(); +} + +int ReachingDefAnalysis::getReachingDef(MachineInstr *MI, int PhysReg) { + assert(InstIds.count(MI) && "Unexpected machine instuction."); + int InstId = InstIds[MI]; + int DefRes = ReachingDefDefaultVal; + unsigned MBBNumber = MI->getParent()->getNumber(); + assert(MBBNumber < MBBReachingDefs.size() && + "Unexpected basic block number."); + int LatestDef = ReachingDefDefaultVal; + for (MCRegUnitIterator Unit(PhysReg, TRI); Unit.isValid(); ++Unit) { + for (int Def : MBBReachingDefs[MBBNumber][*Unit]) { + if (Def >= InstId) + break; + DefRes = Def; + } + LatestDef = std::max(LatestDef, DefRes); + } + return LatestDef; +} + +int ReachingDefAnalysis::getClearance(MachineInstr *MI, MCPhysReg PhysReg) { + assert(InstIds.count(MI) && "Unexpected machine instuction."); + return InstIds[MI] - getReachingDef(MI, PhysReg); +} diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.cpp b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp index 74c1592634aa..bc28a054c680 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocBase.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp @@ -91,7 +91,7 @@ void RegAllocBase::allocatePhysRegs() { // Unused registers can appear when the spiller coalesces snippets. if (MRI->reg_nodbg_empty(VirtReg->reg)) { - DEBUG(dbgs() << "Dropping unused " << *VirtReg << '\n'); + LLVM_DEBUG(dbgs() << "Dropping unused " << *VirtReg << '\n'); aboutToRemoveInterval(*VirtReg); LIS->removeInterval(VirtReg->reg); continue; @@ -103,9 +103,9 @@ void RegAllocBase::allocatePhysRegs() { // selectOrSplit requests the allocator to return an available physical // register if possible and populate a list of new live intervals that // result from splitting. - DEBUG(dbgs() << "\nselectOrSplit " - << TRI->getRegClassName(MRI->getRegClass(VirtReg->reg)) - << ':' << *VirtReg << " w=" << VirtReg->weight << '\n'); + LLVM_DEBUG(dbgs() << "\nselectOrSplit " + << TRI->getRegClassName(MRI->getRegClass(VirtReg->reg)) + << ':' << *VirtReg << " w=" << VirtReg->weight << '\n'); using VirtRegVec = SmallVector<unsigned, 4>; @@ -145,12 +145,12 @@ void RegAllocBase::allocatePhysRegs() { assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned"); if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) { assert(SplitVirtReg->empty() && "Non-empty but used interval"); - DEBUG(dbgs() << "not queueing unused " << *SplitVirtReg << '\n'); + LLVM_DEBUG(dbgs() << "not queueing unused " << *SplitVirtReg << '\n'); aboutToRemoveInterval(*SplitVirtReg); LIS->removeInterval(SplitVirtReg->reg); continue; } - DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n"); + LLVM_DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n"); assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) && "expect split value in virtual register"); enqueue(SplitVirtReg); diff --git a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp index 1125d2c62bef..daeff3fc3963 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp @@ -219,8 +219,8 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, Intfs.push_back(Intf); } } - DEBUG(dbgs() << "spilling " << printReg(PhysReg, TRI) - << " interferences with " << VirtReg << "\n"); + LLVM_DEBUG(dbgs() << "spilling " << printReg(PhysReg, TRI) + << " interferences with " << VirtReg << "\n"); assert(!Intfs.empty() && "expected interference"); // Spill each interfering vreg allocated to PhysReg or an alias. @@ -292,7 +292,7 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, } // No other spill candidates were found, so spill the current VirtReg. - DEBUG(dbgs() << "spilling: " << VirtReg << '\n'); + LLVM_DEBUG(dbgs() << "spilling: " << VirtReg << '\n'); if (!VirtReg.isSpillable()) return ~0u; LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM, this, &DeadRemats); @@ -304,9 +304,8 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, } bool RABasic::runOnMachineFunction(MachineFunction &mf) { - DEBUG(dbgs() << "********** BASIC REGISTER ALLOCATION **********\n" - << "********** Function: " - << mf.getName() << '\n'); + LLVM_DEBUG(dbgs() << "********** BASIC REGISTER ALLOCATION **********\n" + << "********** Function: " << mf.getName() << '\n'); MF = &mf; RegAllocBase::init(getAnalysis<VirtRegMap>(), @@ -323,7 +322,7 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) { postOptimization(); // Diagnostic output before rewriting - DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n"); + LLVM_DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n"); releaseMemory(); return true; diff --git a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp index 17d9492d942e..7b57c6cbcdb8 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp @@ -179,7 +179,7 @@ namespace { } private: - bool runOnMachineFunction(MachineFunction &Fn) override; + bool runOnMachineFunction(MachineFunction &MF) override; void allocateBasicBlock(MachineBasicBlock &MBB); void handleThroughOperands(MachineInstr &MI, SmallVectorImpl<unsigned> &VirtDead); @@ -206,7 +206,7 @@ namespace { return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg)); } - LiveRegMap::iterator assignVirtToPhysReg(unsigned VReg, MCPhysReg PhysReg); + LiveRegMap::iterator assignVirtToPhysReg(unsigned VirtReg, MCPhysReg PhysReg); LiveRegMap::iterator allocVirtReg(MachineInstr &MI, LiveRegMap::iterator, unsigned Hint); LiveRegMap::iterator defineVirtReg(MachineInstr &MI, unsigned OpNum, @@ -322,11 +322,11 @@ void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, // instruction, not on the spill. bool SpillKill = MachineBasicBlock::iterator(LR.LastUse) != MI; LR.Dirty = false; - DEBUG(dbgs() << "Spilling " << printReg(LRI->VirtReg, TRI) - << " in " << printReg(LR.PhysReg, TRI)); + LLVM_DEBUG(dbgs() << "Spilling " << printReg(LRI->VirtReg, TRI) << " in " + << printReg(LR.PhysReg, TRI)); const TargetRegisterClass &RC = *MRI->getRegClass(LRI->VirtReg); int FI = getStackSpaceFor(LRI->VirtReg, RC); - DEBUG(dbgs() << " to stack slot #" << FI << "\n"); + LLVM_DEBUG(dbgs() << " to stack slot #" << FI << "\n"); TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, SpillKill, FI, &RC, TRI); ++NumStores; // Update statistics @@ -339,7 +339,9 @@ void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, MachineInstr *NewDV = buildDbgValueForSpill(*MBB, MI, *DBG, FI); assert(NewDV->getParent() == MBB && "dangling parent pointer"); (void)NewDV; - DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV); + LLVM_DEBUG(dbgs() << "Inserting debug info due to spill:" + << "\n" + << *NewDV); } // Now this register is spilled there is should not be any DBG_VALUE // pointing to this register because they are all pointing to spilled value @@ -470,13 +472,14 @@ void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI, } } -/// \brief Return the cost of spilling clearing out PhysReg and aliases so it is +/// Return the cost of spilling clearing out PhysReg and aliases so it is /// free for allocation. Returns 0 when PhysReg is free or disabled with all /// aliases disabled - it can be allocated directly. /// \returns spillImpossible when PhysReg or an alias can't be spilled. unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const { if (isRegUsedInInstr(PhysReg)) { - DEBUG(dbgs() << printReg(PhysReg, TRI) << " is already used in instr.\n"); + LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) + << " is already used in instr.\n"); return spillImpossible; } switch (unsigned VirtReg = PhysRegState[PhysReg]) { @@ -485,8 +488,8 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const { case regFree: return 0; case regReserved: - DEBUG(dbgs() << printReg(VirtReg, TRI) << " corresponding " - << printReg(PhysReg, TRI) << " is reserved already.\n"); + LLVM_DEBUG(dbgs() << printReg(VirtReg, TRI) << " corresponding " + << printReg(PhysReg, TRI) << " is reserved already.\n"); return spillImpossible; default: { LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg); @@ -496,7 +499,7 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const { } // This is a disabled register, add up cost of aliases. - DEBUG(dbgs() << printReg(PhysReg, TRI) << " is disabled.\n"); + LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is disabled.\n"); unsigned Cost = 0; for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { MCPhysReg Alias = *AI; @@ -519,12 +522,12 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const { return Cost; } -/// \brief This method updates local state so that we know that PhysReg is the +/// This method updates local state so that we know that PhysReg is the /// proper container for VirtReg now. The physical register must not be used /// for anything else when this is called. void RegAllocFast::assignVirtToPhysReg(LiveReg &LR, MCPhysReg PhysReg) { - DEBUG(dbgs() << "Assigning " << printReg(LR.VirtReg, TRI) << " to " - << printReg(PhysReg, TRI) << "\n"); + LLVM_DEBUG(dbgs() << "Assigning " << printReg(LR.VirtReg, TRI) << " to " + << printReg(PhysReg, TRI) << "\n"); PhysRegState[PhysReg] = LR.VirtReg; assert(!LR.PhysReg && "Already assigned a physreg"); LR.PhysReg = PhysReg; @@ -570,16 +573,16 @@ RegAllocFast::LiveRegMap::iterator RegAllocFast::allocVirtReg(MachineInstr &MI, } } - DEBUG(dbgs() << "Allocating " << printReg(VirtReg) << " from " - << TRI->getRegClassName(&RC) << "\n"); + LLVM_DEBUG(dbgs() << "Allocating " << printReg(VirtReg) << " from " + << TRI->getRegClassName(&RC) << "\n"); unsigned BestReg = 0; unsigned BestCost = spillImpossible; for (MCPhysReg PhysReg : AO) { unsigned Cost = calcSpillCost(PhysReg); - DEBUG(dbgs() << "\tRegister: " << printReg(PhysReg, TRI) << "\n"); - DEBUG(dbgs() << "\tCost: " << Cost << "\n"); - DEBUG(dbgs() << "\tBestCost: " << BestCost << "\n"); + LLVM_DEBUG(dbgs() << "\tRegister: " << printReg(PhysReg, TRI) << "\n"); + LLVM_DEBUG(dbgs() << "\tCost: " << Cost << "\n"); + LLVM_DEBUG(dbgs() << "\tBestCost: " << BestCost << "\n"); // Cost is 0 when all aliases are already disabled. if (Cost == 0) { assignVirtToPhysReg(*LRI, PhysReg); @@ -654,22 +657,22 @@ RegAllocFast::LiveRegMap::iterator RegAllocFast::reloadVirtReg(MachineInstr &MI, LRI = allocVirtReg(MI, LRI, Hint); const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); int FrameIndex = getStackSpaceFor(VirtReg, RC); - DEBUG(dbgs() << "Reloading " << printReg(VirtReg, TRI) << " into " - << printReg(LRI->PhysReg, TRI) << "\n"); + LLVM_DEBUG(dbgs() << "Reloading " << printReg(VirtReg, TRI) << " into " + << printReg(LRI->PhysReg, TRI) << "\n"); TII->loadRegFromStackSlot(*MBB, MI, LRI->PhysReg, FrameIndex, &RC, TRI); ++NumLoads; } else if (LRI->Dirty) { if (isLastUseOfLocalReg(MO)) { - DEBUG(dbgs() << "Killing last use: " << MO << "\n"); + LLVM_DEBUG(dbgs() << "Killing last use: " << MO << "\n"); if (MO.isUse()) MO.setIsKill(); else MO.setIsDead(); } else if (MO.isKill()) { - DEBUG(dbgs() << "Clearing dubious kill: " << MO << "\n"); + LLVM_DEBUG(dbgs() << "Clearing dubious kill: " << MO << "\n"); MO.setIsKill(false); } else if (MO.isDead()) { - DEBUG(dbgs() << "Clearing dubious dead: " << MO << "\n"); + LLVM_DEBUG(dbgs() << "Clearing dubious dead: " << MO << "\n"); MO.setIsDead(false); } } else if (MO.isKill()) { @@ -677,10 +680,10 @@ RegAllocFast::LiveRegMap::iterator RegAllocFast::reloadVirtReg(MachineInstr &MI, // register would be killed immediately, and there might be a second use: // %foo = OR killed %x, %x // This would cause a second reload of %x into a different register. - DEBUG(dbgs() << "Clearing clean kill: " << MO << "\n"); + LLVM_DEBUG(dbgs() << "Clearing clean kill: " << MO << "\n"); MO.setIsKill(false); } else if (MO.isDead()) { - DEBUG(dbgs() << "Clearing clean dead: " << MO << "\n"); + LLVM_DEBUG(dbgs() << "Clearing clean dead: " << MO << "\n"); MO.setIsDead(false); } assert(LRI->PhysReg && "Register not assigned"); @@ -699,13 +702,13 @@ bool RegAllocFast::setPhysReg(MachineInstr &MI, unsigned OpNum, bool Dead = MO.isDead(); if (!MO.getSubReg()) { MO.setReg(PhysReg); - MO.setIsRenamableIfNoExtraRegAllocReq(); + MO.setIsRenamable(true); return MO.isKill() || Dead; } // Handle subregister index. MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : 0); - MO.setIsRenamableIfNoExtraRegAllocReq(); + MO.setIsRenamable(true); MO.setSubReg(0); // A kill flag implies killing the full register. Add corresponding super @@ -727,7 +730,7 @@ bool RegAllocFast::setPhysReg(MachineInstr &MI, unsigned OpNum, // there are additional physreg defines. void RegAllocFast::handleThroughOperands(MachineInstr &MI, SmallVectorImpl<unsigned> &VirtDead) { - DEBUG(dbgs() << "Scanning for through registers:"); + LLVM_DEBUG(dbgs() << "Scanning for through registers:"); SmallSet<unsigned, 8> ThroughRegs; for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; @@ -737,13 +740,13 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI, if (MO.isEarlyClobber() || (MO.isUse() && MO.isTied()) || (MO.getSubReg() && MI.readsVirtualRegister(Reg))) { if (ThroughRegs.insert(Reg).second) - DEBUG(dbgs() << ' ' << printReg(Reg)); + LLVM_DEBUG(dbgs() << ' ' << printReg(Reg)); } } // If any physreg defines collide with preallocated through registers, // we must spill and reallocate. - DEBUG(dbgs() << "\nChecking for physdef collisions.\n"); + LLVM_DEBUG(dbgs() << "\nChecking for physdef collisions.\n"); for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || !MO.isDef()) continue; unsigned Reg = MO.getReg(); @@ -756,7 +759,7 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI, } SmallVector<unsigned, 8> PartialDefs; - DEBUG(dbgs() << "Allocating tied uses.\n"); + LLVM_DEBUG(dbgs() << "Allocating tied uses.\n"); for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { const MachineOperand &MO = MI.getOperand(I); if (!MO.isReg()) continue; @@ -764,15 +767,16 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI, if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; if (MO.isUse()) { if (!MO.isTied()) continue; - DEBUG(dbgs() << "Operand " << I << "("<< MO << ") is tied to operand " - << MI.findTiedOperandIdx(I) << ".\n"); + LLVM_DEBUG(dbgs() << "Operand " << I << "(" << MO + << ") is tied to operand " << MI.findTiedOperandIdx(I) + << ".\n"); LiveRegMap::iterator LRI = reloadVirtReg(MI, I, Reg, 0); MCPhysReg PhysReg = LRI->PhysReg; setPhysReg(MI, I, PhysReg); // Note: we don't update the def operand yet. That would cause the normal // def-scan to attempt spilling. } else if (MO.getSubReg() && MI.readsVirtualRegister(Reg)) { - DEBUG(dbgs() << "Partial redefine: " << MO << "\n"); + LLVM_DEBUG(dbgs() << "Partial redefine: " << MO << "\n"); // Reload the register, but don't assign to the operand just yet. // That would confuse the later phys-def processing pass. LiveRegMap::iterator LRI = reloadVirtReg(MI, I, Reg, 0); @@ -780,7 +784,7 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI, } } - DEBUG(dbgs() << "Allocating early clobbers.\n"); + LLVM_DEBUG(dbgs() << "Allocating early clobbers.\n"); for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { const MachineOperand &MO = MI.getOperand(I); if (!MO.isReg()) continue; @@ -801,8 +805,8 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI, if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue; unsigned Reg = MO.getReg(); if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; - DEBUG(dbgs() << "\tSetting " << printReg(Reg, TRI) - << " as used in instr\n"); + LLVM_DEBUG(dbgs() << "\tSetting " << printReg(Reg, TRI) + << " as used in instr\n"); markRegUsedInInstr(Reg); } @@ -848,7 +852,7 @@ void RegAllocFast::dumpState() { void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) { this->MBB = &MBB; - DEBUG(dbgs() << "\nAllocating " << MBB); + LLVM_DEBUG(dbgs() << "\nAllocating " << MBB); PhysRegState.assign(TRI->getNumRegs(), regDisabled); assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?"); @@ -866,10 +870,7 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) { // Otherwise, sequentially allocate each instruction in the MBB. for (MachineInstr &MI : MBB) { const MCInstrDesc &MCID = MI.getDesc(); - DEBUG( - dbgs() << "\n>> " << MI << "Regs:"; - dumpState() - ); + LLVM_DEBUG(dbgs() << "\n>> " << MI << "Regs:"; dumpState()); // Debug values are not allowed to change codegen in any way. if (MI.isDebugValue()) { @@ -894,13 +895,13 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) { if (SS != -1) { // Modify DBG_VALUE now that the value is in a spill slot. updateDbgValueForSpill(*DebugMI, SS); - DEBUG(dbgs() << "Modifying debug info due to spill:" - << "\t" << *DebugMI); + LLVM_DEBUG(dbgs() << "Modifying debug info due to spill:" + << "\t" << *DebugMI); continue; } // We can't allocate a physreg for a DebugValue, sorry! - DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE"); + LLVM_DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE"); MO.setReg(0); } @@ -910,6 +911,9 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) { continue; } + if (MI.isDebugLabel()) + continue; + // If this is a copy, we may be able to coalesce. unsigned CopySrcReg = 0; unsigned CopyDstReg = 0; @@ -1025,7 +1029,7 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) { // as call-clobbered, this is not correct because some of those // definitions may be used later on and we do not want to reuse // those for virtual registers in between. - DEBUG(dbgs() << " Spilling remaining registers before call.\n"); + LLVM_DEBUG(dbgs() << " Spilling remaining registers before call.\n"); spillAll(MI); } @@ -1060,15 +1064,15 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) { VirtDead.clear(); if (CopyDstReg && CopyDstReg == CopySrcReg && CopyDstSub == CopySrcSub) { - DEBUG(dbgs() << "-- coalescing: " << MI); + LLVM_DEBUG(dbgs() << "-- coalescing: " << MI); Coalesced.push_back(&MI); } else { - DEBUG(dbgs() << "<< " << MI); + LLVM_DEBUG(dbgs() << "<< " << MI); } } // Spill all physical registers holding virtual registers now. - DEBUG(dbgs() << "Spilling live registers at end of block.\n"); + LLVM_DEBUG(dbgs() << "Spilling live registers at end of block.\n"); spillAll(MBB.getFirstTerminator()); // Erase all the coalesced copies. We are delaying it until now because @@ -1077,13 +1081,13 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) { MBB.erase(MI); NumCopies += Coalesced.size(); - DEBUG(MBB.dump()); + LLVM_DEBUG(MBB.dump()); } /// Allocates registers for a function. bool RegAllocFast::runOnMachineFunction(MachineFunction &MF) { - DEBUG(dbgs() << "********** FAST REGISTER ALLOCATION **********\n" - << "********** Function: " << MF.getName() << '\n'); + LLVM_DEBUG(dbgs() << "********** FAST REGISTER ALLOCATION **********\n" + << "********** Function: " << MF.getName() << '\n'); MRI = &MF.getRegInfo(); const TargetSubtargetInfo &STI = MF.getSubtarget(); TRI = STI.getRegisterInfo(); diff --git a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp index e492c481a540..3333e1f2fb8b 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -125,6 +125,12 @@ static cl::opt<bool> EnableDeferredSpilling( "variable because of other evicted variables."), cl::init(false)); +static cl::opt<unsigned> + HugeSizeForSplit("huge-size-for-split", cl::Hidden, + cl::desc("A threshold of live range size which may cause " + "high compile time cost in global splitting."), + cl::init(5000)); + // FIXME: Find a good default for this flag and remove the flag. static cl::opt<unsigned> CSRFirstTimeCost("regalloc-csr-first-time-cost", @@ -292,7 +298,7 @@ class RAGreedy : public MachineFunctionPass, public: using EvictorInfo = std::pair<unsigned /* evictor */, unsigned /* physreg */>; - using EvicteeInfo = llvm::MapVector<unsigned /* evictee */, EvictorInfo>; + using EvicteeInfo = llvm::DenseMap<unsigned /* evictee */, EvictorInfo>; private: /// Each Vreg that has been evicted in the last stage of selectOrSplit will @@ -300,28 +306,28 @@ class RAGreedy : public MachineFunctionPass, EvicteeInfo Evictees; public: - /// \brief Clear all eviction information. + /// Clear all eviction information. void clear() { Evictees.clear(); } - /// \brief Clear eviction information for the given evictee Vreg. + /// Clear eviction information for the given evictee Vreg. /// E.g. when Vreg get's a new allocation, the old eviction info is no /// longer relevant. /// \param Evictee The evictee Vreg for whom we want to clear collected /// eviction info. void clearEvicteeInfo(unsigned Evictee) { Evictees.erase(Evictee); } - /// \brief Track new eviction. + /// Track new eviction. /// The Evictor vreg has evicted the Evictee vreg from Physreg. - /// \praram PhysReg The phisical register Evictee was evicted from. - /// \praram Evictor The evictor Vreg that evicted Evictee. - /// \praram Evictee The evictee Vreg. + /// \param PhysReg The phisical register Evictee was evicted from. + /// \param Evictor The evictor Vreg that evicted Evictee. + /// \param Evictee The evictee Vreg. void addEviction(unsigned PhysReg, unsigned Evictor, unsigned Evictee) { Evictees[Evictee].first = Evictor; Evictees[Evictee].second = PhysReg; } /// Return the Evictor Vreg which evicted Evictee Vreg from PhysReg. - /// \praram Evictee The evictee vreg. + /// \param Evictee The evictee vreg. /// \return The Evictor vreg which evicted Evictee vreg from PhysReg. 0 if /// nobody has evicted Evictee from PhysReg. EvictorInfo getEvictor(unsigned Evictee) { @@ -399,7 +405,7 @@ class RAGreedy : public MachineFunctionPass, /// obtained from the TargetSubtargetInfo. bool EnableLocalReassign; - /// Enable or not the the consideration of the cost of local intervals created + /// Enable or not the consideration of the cost of local intervals created /// by a split candidate when choosing the best split candidate. bool EnableAdvancedRASplitCost; @@ -448,13 +454,16 @@ private: bool splitCanCauseEvictionChain(unsigned Evictee, GlobalSplitCandidate &Cand, unsigned BBNumber, const AllocationOrder &Order); + bool splitCanCauseLocalSpill(unsigned VirtRegToSplit, + GlobalSplitCandidate &Cand, unsigned BBNumber, + const AllocationOrder &Order); BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate &, const AllocationOrder &Order, bool *CanCauseEvictionChain); bool calcCompactRegion(GlobalSplitCandidate&); void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>); void calcGapWeights(unsigned, SmallVectorImpl<float>&); - unsigned canReassign(LiveInterval &VirtReg, unsigned PhysReg); + unsigned canReassign(LiveInterval &VirtReg, unsigned PrevReg); bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool); bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&); bool canEvictInterferenceInRange(LiveInterval &VirtReg, unsigned PhysReg, @@ -475,6 +484,7 @@ private: SmallVectorImpl<unsigned>&, unsigned = ~0u); unsigned tryRegionSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl<unsigned>&); + unsigned isSplitBenefitWorthCost(LiveInterval &VirtReg); /// Calculate cost of region splitting. unsigned calculateRegionSplitCost(LiveInterval &VirtReg, AllocationOrder &Order, @@ -763,7 +773,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, // preferred register. if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg)) if (Order.isHint(Hint)) { - DEBUG(dbgs() << "missed hint " << printReg(Hint, TRI) << '\n'); + LLVM_DEBUG(dbgs() << "missed hint " << printReg(Hint, TRI) << '\n'); EvictionCost MaxCost; MaxCost.setBrokenHints(1); if (canEvictInterference(VirtReg, Hint, true, MaxCost)) { @@ -782,8 +792,8 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, if (!Cost) return PhysReg; - DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost " << Cost - << '\n'); + LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost " + << Cost << '\n'); unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost); return CheapReg ? CheapReg : PhysReg; } @@ -811,9 +821,9 @@ unsigned RAGreedy::canReassign(LiveInterval &VirtReg, unsigned PrevReg) { break; } if (PhysReg) - DEBUG(dbgs() << "can reassign: " << VirtReg << " from " - << printReg(PrevReg, TRI) << " to " << printReg(PhysReg, TRI) - << '\n'); + LLVM_DEBUG(dbgs() << "can reassign: " << VirtReg << " from " + << printReg(PrevReg, TRI) << " to " + << printReg(PhysReg, TRI) << '\n'); return PhysReg; } @@ -840,7 +850,7 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, return true; if (A.weight > B.weight) { - DEBUG(dbgs() << "should evict: " << B << " w= " << B.weight << '\n'); + LLVM_DEBUG(dbgs() << "should evict: " << B << " w= " << B.weight << '\n'); return true; } return false; @@ -934,7 +944,7 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, return true; } -/// \brief Return true if all interferences between VirtReg and PhysReg between +/// Return true if all interferences between VirtReg and PhysReg between /// Start and End can be evicted. /// /// \param VirtReg Live range that is about to be assigned. @@ -986,7 +996,7 @@ bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg, return true; } -/// \brief Return tthe physical register that will be best +/// Return the physical register that will be best /// candidate for eviction by a local split interval that will be created /// between Start and End. /// @@ -1032,8 +1042,8 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, if (!Cascade) Cascade = ExtraRegInfo[VirtReg.reg].Cascade = NextCascade++; - DEBUG(dbgs() << "evicting " << printReg(PhysReg, TRI) - << " interference: Cascade " << Cascade << '\n'); + LLVM_DEBUG(dbgs() << "evicting " << printReg(PhysReg, TRI) + << " interference: Cascade " << Cascade << '\n'); // Collect all interfering virtregs first. SmallVector<LiveInterval*, 8> Intfs; @@ -1104,8 +1114,8 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg); unsigned MinCost = RegClassInfo.getMinCost(RC); if (MinCost >= CostPerUseLimit) { - DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = " << MinCost - << ", no cheaper registers to be found.\n"); + LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = " + << MinCost << ", no cheaper registers to be found.\n"); return 0; } @@ -1113,7 +1123,8 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, // the same cost. We don't need to look at them if they're too expensive. if (TRI->getCostPerUse(Order.getOrder().back()) >= CostPerUseLimit) { OrderLimit = RegClassInfo.getLastCostChange(RC); - DEBUG(dbgs() << "Only trying the first " << OrderLimit << " regs.\n"); + LLVM_DEBUG(dbgs() << "Only trying the first " << OrderLimit + << " regs.\n"); } } @@ -1124,9 +1135,10 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, // The first use of a callee-saved register in a function has cost 1. // Don't start using a CSR when the CostPerUseLimit is low. if (CostPerUseLimit == 1 && isUnusedCalleeSavedReg(PhysReg)) { - DEBUG(dbgs() << printReg(PhysReg, TRI) << " would clobber CSR " - << printReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI) - << '\n'); + LLVM_DEBUG( + dbgs() << printReg(PhysReg, TRI) << " would clobber CSR " + << printReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI) + << '\n'); continue; } @@ -1313,7 +1325,7 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand) { // Perhaps iterating can enable more bundles? SpillPlacer->iterate(); } - DEBUG(dbgs() << ", v=" << Visited); + LLVM_DEBUG(dbgs() << ", v=" << Visited); } /// calcCompactRegion - Compute the set of edge bundles that should be live @@ -1331,7 +1343,7 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) { // Compact regions don't correspond to any physreg. Cand.reset(IntfCache, 0); - DEBUG(dbgs() << "Compact region bundles"); + LLVM_DEBUG(dbgs() << "Compact region bundles"); // Use the spill placer to determine the live bundles. GrowRegion pretends // that all the through blocks have interference when PhysReg is unset. @@ -1340,7 +1352,7 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) { // The static split cost will be zero since Cand.Intf reports no interference. BlockFrequency Cost; if (!addSplitConstraints(Cand.Intf, Cost)) { - DEBUG(dbgs() << ", none.\n"); + LLVM_DEBUG(dbgs() << ", none.\n"); return false; } @@ -1348,11 +1360,11 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) { SpillPlacer->finish(); if (!Cand.LiveBundles.any()) { - DEBUG(dbgs() << ", none.\n"); + LLVM_DEBUG(dbgs() << ", none.\n"); return false; } - DEBUG({ + LLVM_DEBUG({ for (int i : Cand.LiveBundles.set_bits()) dbgs() << " EB#" << i; dbgs() << ".\n"; @@ -1378,7 +1390,7 @@ BlockFrequency RAGreedy::calcSpillCost() { return Cost; } -/// \brief Check if splitting Evictee will create a local split interval in +/// Check if splitting Evictee will create a local split interval in /// basic block number BBNumber that may cause a bad eviction chain. This is /// intended to prevent bad eviction sequences like: /// movl %ebp, 8(%esp) # 4-byte Spill @@ -1401,7 +1413,7 @@ BlockFrequency RAGreedy::calcSpillCost() { /// Evictee %0 is intended for region splitting with split candidate /// physreg0 (the reg %0 was evicted from). /// Region splitting creates a local interval because of interference with the -/// evictor %1 (normally region spliitting creates 2 interval, the "by reg" +/// evictor %1 (normally region splitting creates 2 interval, the "by reg" /// and "by stack" intervals and local interval created when interference /// occurs). /// One of the split intervals ends up evicting %2 from physreg1. @@ -1427,7 +1439,7 @@ BlockFrequency RAGreedy::calcSpillCost() { /// we are splitting for and the interferences. /// \param BBNumber The number of a BB for which the region split process will /// create a local split interval. -/// \param Order The phisical registers that may get evicted by a split +/// \param Order The physical registers that may get evicted by a split /// artifact of Evictee. /// \return True if splitting Evictee may cause a bad eviction chain, false /// otherwise. @@ -1448,8 +1460,8 @@ bool RAGreedy::splitCanCauseEvictionChain(unsigned Evictee, getCheapestEvicteeWeight(Order, LIS->getInterval(Evictee), Cand.Intf.first(), Cand.Intf.last(), &MaxWeight); - // The bad eviction chain occurs when either the split candidate the the - // evited reg or one of the split artifact will evict the evicting reg. + // The bad eviction chain occurs when either the split candidate is the + // evicting reg or one of the split artifact will evict the evicting reg. if ((PhysReg != Cand.PhysReg) && (PhysReg != FutureEvictedPhysReg)) return false; @@ -1479,6 +1491,54 @@ bool RAGreedy::splitCanCauseEvictionChain(unsigned Evictee, return true; } +/// Check if splitting VirtRegToSplit will create a local split interval +/// in basic block number BBNumber that may cause a spill. +/// +/// \param VirtRegToSplit The register considered to be split. +/// \param Cand The split candidate that determines the physical +/// register we are splitting for and the interferences. +/// \param BBNumber The number of a BB for which the region split process +/// will create a local split interval. +/// \param Order The physical registers that may get evicted by a +/// split artifact of VirtRegToSplit. +/// \return True if splitting VirtRegToSplit may cause a spill, false +/// otherwise. +bool RAGreedy::splitCanCauseLocalSpill(unsigned VirtRegToSplit, + GlobalSplitCandidate &Cand, + unsigned BBNumber, + const AllocationOrder &Order) { + Cand.Intf.moveToBlock(BBNumber); + + // Check if the local interval will find a non interfereing assignment. + for (auto PhysReg : Order.getOrder()) { + if (!Matrix->checkInterference(Cand.Intf.first().getPrevIndex(), + Cand.Intf.last(), PhysReg)) + return false; + } + + // Check if the local interval will evict a cheaper interval. + float CheapestEvictWeight = 0; + unsigned FutureEvictedPhysReg = getCheapestEvicteeWeight( + Order, LIS->getInterval(VirtRegToSplit), Cand.Intf.first(), + Cand.Intf.last(), &CheapestEvictWeight); + + // Have we found an interval that can be evicted? + if (FutureEvictedPhysReg) { + VirtRegAuxInfo VRAI(*MF, *LIS, VRM, getAnalysis<MachineLoopInfo>(), *MBFI); + float splitArtifactWeight = + VRAI.futureWeight(LIS->getInterval(VirtRegToSplit), + Cand.Intf.first().getPrevIndex(), Cand.Intf.last()); + // Will the weight of the local interval be higher than the cheapest evictee + // weight? If so it will evict it and will not cause a spill. + if (splitArtifactWeight >= 0 && splitArtifactWeight > CheapestEvictWeight) + return false; + } + + // The local interval is not able to find non interferencing assignment and + // not able to evict a less worthy interval, therfore, it can cause a spill. + return true; +} + /// calcGlobalSplitCost - Return the global split cost of following the split /// pattern in LiveBundles. This cost should be added to the local cost of the /// interference pattern in SplitConstraints. @@ -1499,19 +1559,26 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand, Cand.Intf.moveToBlock(BC.Number); // Check wheather a local interval is going to be created during the region - // split. - if (EnableAdvancedRASplitCost && CanCauseEvictionChain && - Cand.Intf.hasInterference() && BI.LiveIn && BI.LiveOut && RegIn && - RegOut) { - - if (splitCanCauseEvictionChain(VirtRegToSplit, Cand, BC.Number, Order)) { - // This interfernce cause our eviction from this assignment, we might - // evict somebody else, add that cost. + // split. Calculate adavanced spilt cost (cost of local intervals) if option + // is enabled. + if (EnableAdvancedRASplitCost && Cand.Intf.hasInterference() && BI.LiveIn && + BI.LiveOut && RegIn && RegOut) { + + if (CanCauseEvictionChain && + splitCanCauseEvictionChain(VirtRegToSplit, Cand, BC.Number, Order)) { + // This interference causes our eviction from this assignment, we might + // evict somebody else and eventually someone will spill, add that cost. // See splitCanCauseEvictionChain for detailed description of scenarios. GlobalCost += SpillPlacer->getBlockFrequency(BC.Number); GlobalCost += SpillPlacer->getBlockFrequency(BC.Number); *CanCauseEvictionChain = true; + + } else if (splitCanCauseLocalSpill(VirtRegToSplit, Cand, BC.Number, + Order)) { + // This interference causes local interval to spill, add that cost. + GlobalCost += SpillPlacer->getBlockFrequency(BC.Number); + GlobalCost += SpillPlacer->getBlockFrequency(BC.Number); } } @@ -1540,7 +1607,7 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand, // region split. if (EnableAdvancedRASplitCost && CanCauseEvictionChain && splitCanCauseEvictionChain(VirtRegToSplit, Cand, Number, Order)) { - // This interfernce cause our eviction from this assignment, we might + // This interference cause our eviction from this assignment, we might // evict somebody else, add that cost. // See splitCanCauseEvictionChain for detailed description of // scenarios. @@ -1575,7 +1642,8 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, // These are the intervals created for new global ranges. We may create more // intervals for local ranges. const unsigned NumGlobalIntvs = LREdit.size(); - DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs << " globals.\n"); + LLVM_DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs + << " globals.\n"); assert(NumGlobalIntvs && "No global intervals configured"); // Isolate even single instructions when dealing with a proper sub-class. @@ -1612,7 +1680,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, // Create separate intervals for isolated blocks with multiple uses. if (!IntvIn && !IntvOut) { - DEBUG(dbgs() << printMBBReference(*BI.MBB) << " isolated.\n"); + LLVM_DEBUG(dbgs() << printMBBReference(*BI.MBB) << " isolated.\n"); if (SA->shouldSplitSingleBlock(BI, SingleInstrs)) SE->splitSingleBlock(BI); continue; @@ -1694,8 +1762,8 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, // blocks is strictly decreasing. if (IntvMap[i] < NumGlobalIntvs) { if (SA->countLiveBlocks(&Reg) >= OrigBlocks) { - DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks - << " blocks as original.\n"); + LLVM_DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks + << " blocks as original.\n"); // Don't allow repeated splitting as a safe guard against looping. setStage(Reg, RS_Split2); } @@ -1710,8 +1778,21 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, MF->verify(this, "After splitting live range around region"); } +// Global split has high compile time cost especially for large live range. +// Return false for the case here where the potential benefit will never +// worth the cost. +unsigned RAGreedy::isSplitBenefitWorthCost(LiveInterval &VirtReg) { + MachineInstr *MI = MRI->getUniqueVRegDef(VirtReg.reg); + if (MI && TII->isTriviallyReMaterializable(*MI, AA) && + VirtReg.size() > HugeSizeForSplit) + return false; + return true; +} + unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl<unsigned> &NewVRegs) { + if (!isSplitBenefitWorthCost(VirtReg)) + return 0; unsigned NumCands = 0; BlockFrequency SpillCost = calcSpillCost(); BlockFrequency BestCost; @@ -1726,8 +1807,8 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, // No benefit from the compact region, our fallback will be per-block // splitting. Make sure we find a solution that is cheaper than spilling. BestCost = SpillCost; - DEBUG(dbgs() << "Cost of isolating all blocks = "; - MBFI->printBlockFreq(dbgs(), BestCost) << '\n'); + LLVM_DEBUG(dbgs() << "Cost of isolating all blocks = "; + MBFI->printBlockFreq(dbgs(), BestCost) << '\n'); } bool CanCauseEvictionChain = false; @@ -1790,13 +1871,13 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, SpillPlacer->prepare(Cand.LiveBundles); BlockFrequency Cost; if (!addSplitConstraints(Cand.Intf, Cost)) { - DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tno positive bundles\n"); + LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tno positive bundles\n"); continue; } - DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tstatic = "; - MBFI->printBlockFreq(dbgs(), Cost)); + LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tstatic = "; + MBFI->printBlockFreq(dbgs(), Cost)); if (Cost >= BestCost) { - DEBUG({ + LLVM_DEBUG({ if (BestCand == NoCand) dbgs() << " worse than no bundles\n"; else @@ -1811,15 +1892,15 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, // No live bundles, defer to splitSingleBlocks(). if (!Cand.LiveBundles.any()) { - DEBUG(dbgs() << " no bundles.\n"); + LLVM_DEBUG(dbgs() << " no bundles.\n"); continue; } bool HasEvictionChain = false; Cost += calcGlobalSplitCost(Cand, Order, &HasEvictionChain); - DEBUG({ - dbgs() << ", total = "; MBFI->printBlockFreq(dbgs(), Cost) - << " with bundles"; + LLVM_DEBUG({ + dbgs() << ", total = "; + MBFI->printBlockFreq(dbgs(), Cost) << " with bundles"; for (int i : Cand.LiveBundles.set_bits()) dbgs() << " EB#" << i; dbgs() << ".\n"; @@ -1838,11 +1919,11 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, if (CanCauseEvictionChain && BestCand != NoCand) { // See splitCanCauseEvictionChain for detailed description of bad // eviction chain scenarios. - DEBUG(dbgs() << "Best split candidate of vreg " - << printReg(VirtReg.reg, TRI) << " may "); + LLVM_DEBUG(dbgs() << "Best split candidate of vreg " + << printReg(VirtReg.reg, TRI) << " may "); if (!(*CanCauseEvictionChain)) - DEBUG(dbgs() << "not "); - DEBUG(dbgs() << "cause bad eviction chain\n"); + LLVM_DEBUG(dbgs() << "not "); + LLVM_DEBUG(dbgs() << "cause bad eviction chain\n"); } return BestCand; @@ -1865,8 +1946,8 @@ unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, if (unsigned B = Cand.getBundles(BundleCand, BestCand)) { UsedCands.push_back(BestCand); Cand.IntvIdx = SE->openIntv(); - DEBUG(dbgs() << "Split for " << printReg(Cand.PhysReg, TRI) << " in " - << B << " bundles, intv " << Cand.IntvIdx << ".\n"); + LLVM_DEBUG(dbgs() << "Split for " << printReg(Cand.PhysReg, TRI) << " in " + << B << " bundles, intv " << Cand.IntvIdx << ".\n"); (void)B; } } @@ -1878,8 +1959,8 @@ unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, if (unsigned B = Cand.getBundles(BundleCand, 0)) { UsedCands.push_back(0); Cand.IntvIdx = SE->openIntv(); - DEBUG(dbgs() << "Split for compact region in " << B << " bundles, intv " - << Cand.IntvIdx << ".\n"); + LLVM_DEBUG(dbgs() << "Split for compact region in " << B + << " bundles, intv " << Cand.IntvIdx << ".\n"); (void)B; } } @@ -1978,7 +2059,8 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, if (Uses.size() <= 1) return 0; - DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n"); + LLVM_DEBUG(dbgs() << "Split around " << Uses.size() + << " individual instrs.\n"); const TargetRegisterClass *SuperRC = TRI->getLargestLegalSuperClass(CurRC, *MF); @@ -1993,7 +2075,7 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, SuperRCNumAllocatableRegs == getNumAllocatableRegsForConstraints(MI, VirtReg.reg, SuperRC, TII, TRI, RCI)) { - DEBUG(dbgs() << " skip:\t" << Uses[i] << '\t' << *MI); + LLVM_DEBUG(dbgs() << " skip:\t" << Uses[i] << '\t' << *MI); continue; } SE->openIntv(); @@ -2003,7 +2085,7 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, } if (LREdit.empty()) { - DEBUG(dbgs() << "All uses were copies.\n"); + LLVM_DEBUG(dbgs() << "All uses were copies.\n"); return 0; } @@ -2121,7 +2203,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, return 0; const unsigned NumGaps = Uses.size()-1; - DEBUG({ + LLVM_DEBUG({ dbgs() << "tryLocalSplit: "; for (unsigned i = 0, e = Uses.size(); i != e; ++i) dbgs() << ' ' << Uses[i]; @@ -2134,7 +2216,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, if (Matrix->checkRegMaskInterference(VirtReg)) { // Get regmask slots for the whole block. ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber()); - DEBUG(dbgs() << RMS.size() << " regmasks in block:"); + LLVM_DEBUG(dbgs() << RMS.size() << " regmasks in block:"); // Constrain to VirtReg's live range. unsigned ri = std::lower_bound(RMS.begin(), RMS.end(), Uses.front().getRegSlot()) - RMS.begin(); @@ -2148,14 +2230,15 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // overlap the live range. if (SlotIndex::isSameInstr(Uses[i+1], RMS[ri]) && i+1 == NumGaps) break; - DEBUG(dbgs() << ' ' << RMS[ri] << ':' << Uses[i] << '-' << Uses[i+1]); + LLVM_DEBUG(dbgs() << ' ' << RMS[ri] << ':' << Uses[i] << '-' + << Uses[i + 1]); RegMaskGaps.push_back(i); // Advance ri to the next gap. A regmask on one of the uses counts in // both gaps. while (ri != re && SlotIndex::isEarlierInstr(RMS[ri], Uses[i+1])) ++ri; } - DEBUG(dbgs() << '\n'); + LLVM_DEBUG(dbgs() << '\n'); } // Since we allow local split results to be split again, there is a risk of @@ -2214,13 +2297,12 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, const bool LiveBefore = SplitBefore != 0 || BI.LiveIn; const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut; - DEBUG(dbgs() << printReg(PhysReg, TRI) << ' ' - << Uses[SplitBefore] << '-' << Uses[SplitAfter] - << " i=" << MaxGap); + LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << ' ' << Uses[SplitBefore] + << '-' << Uses[SplitAfter] << " i=" << MaxGap); // Stop before the interval gets so big we wouldn't be making progress. if (!LiveBefore && !LiveAfter) { - DEBUG(dbgs() << " all\n"); + LLVM_DEBUG(dbgs() << " all\n"); break; } // Should the interval be extended or shrunk? @@ -2245,12 +2327,12 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, 1); // Would this split be possible to allocate? // Never allocate all gaps, we wouldn't be making progress. - DEBUG(dbgs() << " w=" << EstWeight); + LLVM_DEBUG(dbgs() << " w=" << EstWeight); if (EstWeight * Hysteresis >= MaxGap) { Shrink = false; float Diff = EstWeight - MaxGap; if (Diff > BestDiff) { - DEBUG(dbgs() << " (best)"); + LLVM_DEBUG(dbgs() << " (best)"); BestDiff = Hysteresis * Diff; BestBefore = SplitBefore; BestAfter = SplitAfter; @@ -2261,7 +2343,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // Try to shrink. if (Shrink) { if (++SplitBefore < SplitAfter) { - DEBUG(dbgs() << " shrink\n"); + LLVM_DEBUG(dbgs() << " shrink\n"); // Recompute the max when necessary. if (GapWeight[SplitBefore - 1] >= MaxGap) { MaxGap = GapWeight[SplitBefore]; @@ -2275,11 +2357,11 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // Try to extend the interval. if (SplitAfter >= NumGaps) { - DEBUG(dbgs() << " end\n"); + LLVM_DEBUG(dbgs() << " end\n"); break; } - DEBUG(dbgs() << " extend\n"); + LLVM_DEBUG(dbgs() << " extend\n"); MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]); } } @@ -2288,9 +2370,9 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, if (BestBefore == NumGaps) return 0; - DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore] - << '-' << Uses[BestAfter] << ", " << BestDiff - << ", " << (BestAfter - BestBefore + 1) << " instrs\n"); + LLVM_DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore] << '-' + << Uses[BestAfter] << ", " << BestDiff << ", " + << (BestAfter - BestBefore + 1) << " instrs\n"); LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); SE->reset(LREdit); @@ -2310,14 +2392,14 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, bool LiveAfter = BestAfter != NumGaps || BI.LiveOut; unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter; if (NewGaps >= NumGaps) { - DEBUG(dbgs() << "Tagging non-progress ranges: "); + LLVM_DEBUG(dbgs() << "Tagging non-progress ranges: "); assert(!ProgressRequired && "Didn't make progress when it was required."); for (unsigned i = 0, e = IntvMap.size(); i != e; ++i) if (IntvMap[i] == 1) { setStage(LIS->getInterval(LREdit.get(i)), RS_Split2); - DEBUG(dbgs() << printReg(LREdit.get(i))); + LLVM_DEBUG(dbgs() << printReg(LREdit.get(i))); } - DEBUG(dbgs() << '\n'); + LLVM_DEBUG(dbgs() << '\n'); } ++NumLocalSplits; @@ -2410,7 +2492,7 @@ RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, // chances are one would not be recolorable. if (Q.collectInterferingVRegs(LastChanceRecoloringMaxInterference) >= LastChanceRecoloringMaxInterference && !ExhaustiveSearch) { - DEBUG(dbgs() << "Early abort: too many interferences.\n"); + LLVM_DEBUG(dbgs() << "Early abort: too many interferences.\n"); CutOffInfo |= CO_Interf; return false; } @@ -2424,7 +2506,8 @@ RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, MRI->getRegClass(Intf->reg) == CurRC) && !(hasTiedDef(MRI, VirtReg.reg) && !hasTiedDef(MRI, Intf->reg))) || FixedRegisters.count(Intf->reg)) { - DEBUG(dbgs() << "Early abort: the interference is not recolorable.\n"); + LLVM_DEBUG( + dbgs() << "Early abort: the interference is not recolorable.\n"); return false; } RecoloringCandidates.insert(Intf); @@ -2477,7 +2560,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, SmallVectorImpl<unsigned> &NewVRegs, SmallVirtRegSet &FixedRegisters, unsigned Depth) { - DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n'); + LLVM_DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n'); // Ranges must be Done. assert((getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) && "Last chance recoloring should really be last chance"); @@ -2486,7 +2569,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, // for target with hundreds of registers. // Indeed, in that case we may want to cut the search space earlier. if (Depth >= LastChanceRecoloringMaxDepth && !ExhaustiveSearch) { - DEBUG(dbgs() << "Abort because max depth has been reached.\n"); + LLVM_DEBUG(dbgs() << "Abort because max depth has been reached.\n"); CutOffInfo |= CO_Depth; return ~0u; } @@ -2503,8 +2586,8 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, Order.rewind(); while (unsigned PhysReg = Order.next()) { - DEBUG(dbgs() << "Try to assign: " << VirtReg << " to " - << printReg(PhysReg, TRI) << '\n'); + LLVM_DEBUG(dbgs() << "Try to assign: " << VirtReg << " to " + << printReg(PhysReg, TRI) << '\n'); RecoloringCandidates.clear(); VirtRegToPhysReg.clear(); CurrentNewVRegs.clear(); @@ -2512,7 +2595,8 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, // It is only possible to recolor virtual register interference. if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) { - DEBUG(dbgs() << "Some interferences are not with virtual registers.\n"); + LLVM_DEBUG( + dbgs() << "Some interferences are not with virtual registers.\n"); continue; } @@ -2521,7 +2605,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, // the interferences. if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates, FixedRegisters)) { - DEBUG(dbgs() << "Some interferences cannot be recolored.\n"); + LLVM_DEBUG(dbgs() << "Some interferences cannot be recolored.\n"); continue; } @@ -2535,7 +2619,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, unsigned ItVirtReg = (*It)->reg; enqueue(RecoloringQueue, *It); assert(VRM->hasPhys(ItVirtReg) && - "Interferences are supposed to be with allocated vairables"); + "Interferences are supposed to be with allocated variables"); // Record the current allocation. VirtRegToPhysReg[ItVirtReg] = VRM->getPhys(ItVirtReg); @@ -2563,8 +2647,8 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, return PhysReg; } - DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to " - << printReg(PhysReg, TRI) << '\n'); + LLVM_DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to " + << printReg(PhysReg, TRI) << '\n'); // The recoloring attempt failed, undo the changes. FixedRegisters = SaveFixedRegisters; @@ -2611,7 +2695,7 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue, unsigned Depth) { while (!RecoloringQueue.empty()) { LiveInterval *LI = dequeue(RecoloringQueue); - DEBUG(dbgs() << "Try to recolor: " << *LI << '\n'); + LLVM_DEBUG(dbgs() << "Try to recolor: " << *LI << '\n'); unsigned PhysReg; PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, Depth + 1); // When splitting happens, the live-range may actually be empty. @@ -2623,11 +2707,12 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue, if (!PhysReg) { assert(LI->empty() && "Only empty live-range do not require a register"); - DEBUG(dbgs() << "Recoloring of " << *LI << " succeeded. Empty LI.\n"); + LLVM_DEBUG(dbgs() << "Recoloring of " << *LI + << " succeeded. Empty LI.\n"); continue; } - DEBUG(dbgs() << "Recoloring of " << *LI - << " succeeded with: " << printReg(PhysReg, TRI) << '\n'); + LLVM_DEBUG(dbgs() << "Recoloring of " << *LI + << " succeeded with: " << printReg(PhysReg, TRI) << '\n'); Matrix->assign(*LI, PhysReg); FixedRegisters.insert(LI->reg); @@ -2735,7 +2820,7 @@ void RAGreedy::initializeCSRCost() { CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry); } -/// \brief Collect the hint info for \p Reg. +/// Collect the hint info for \p Reg. /// The results are stored into \p Out. /// \p Out is not cleared before being populated. void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) { @@ -2759,7 +2844,7 @@ void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) { } } -/// \brief Using the given \p List, compute the cost of the broken hints if +/// Using the given \p List, compute the cost of the broken hints if /// \p PhysReg was used. /// \return The cost of \p List for \p PhysReg. BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List, @@ -2772,7 +2857,7 @@ BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List, return Cost; } -/// \brief Using the register assigned to \p VirtReg, try to recolor +/// Using the register assigned to \p VirtReg, try to recolor /// all the live ranges that are copy-related with \p VirtReg. /// The recoloring is then propagated to all the live-ranges that have /// been recolored and so on, until no more copies can be coalesced or @@ -2794,8 +2879,8 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { Visited.insert(Reg); RecoloringCandidates.push_back(Reg); - DEBUG(dbgs() << "Trying to reconcile hints for: " << printReg(Reg, TRI) << '(' - << printReg(PhysReg, TRI) << ")\n"); + LLVM_DEBUG(dbgs() << "Trying to reconcile hints for: " << printReg(Reg, TRI) + << '(' << printReg(PhysReg, TRI) << ")\n"); do { Reg = RecoloringCandidates.pop_back_val(); @@ -2816,8 +2901,8 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { Matrix->checkInterference(LI, PhysReg))) continue; - DEBUG(dbgs() << printReg(Reg, TRI) << '(' << printReg(CurrPhys, TRI) - << ") is recolorable.\n"); + LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << '(' << printReg(CurrPhys, TRI) + << ") is recolorable.\n"); // Gather the hint info. Info.clear(); @@ -2825,19 +2910,20 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { // Check if recoloring the live-range will increase the cost of the // non-identity copies. if (CurrPhys != PhysReg) { - DEBUG(dbgs() << "Checking profitability:\n"); + LLVM_DEBUG(dbgs() << "Checking profitability:\n"); BlockFrequency OldCopiesCost = getBrokenHintFreq(Info, CurrPhys); BlockFrequency NewCopiesCost = getBrokenHintFreq(Info, PhysReg); - DEBUG(dbgs() << "Old Cost: " << OldCopiesCost.getFrequency() - << "\nNew Cost: " << NewCopiesCost.getFrequency() << '\n'); + LLVM_DEBUG(dbgs() << "Old Cost: " << OldCopiesCost.getFrequency() + << "\nNew Cost: " << NewCopiesCost.getFrequency() + << '\n'); if (OldCopiesCost < NewCopiesCost) { - DEBUG(dbgs() << "=> Not profitable.\n"); + LLVM_DEBUG(dbgs() << "=> Not profitable.\n"); continue; } // At this point, the cost is either cheaper or equal. If it is // equal, we consider this is profitable because it may expose // more recoloring opportunities. - DEBUG(dbgs() << "=> Profitable.\n"); + LLVM_DEBUG(dbgs() << "=> Profitable.\n"); // Recolor the live-range. Matrix->unassign(LI); Matrix->assign(LI, PhysReg); @@ -2851,7 +2937,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { } while (!RecoloringCandidates.empty()); } -/// \brief Try to recolor broken hints. +/// Try to recolor broken hints. /// Broken hints may be repaired by recoloring when an evicted variable /// freed up a register for a larger live-range. /// Consider the following example: @@ -2925,8 +3011,8 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, } LiveRangeStage Stage = getStage(VirtReg); - DEBUG(dbgs() << StageName[Stage] - << " Cascade " << ExtraRegInfo[VirtReg.reg].Cascade << '\n'); + LLVM_DEBUG(dbgs() << StageName[Stage] << " Cascade " + << ExtraRegInfo[VirtReg.reg].Cascade << '\n'); // Try to evict a less worthy live range, but only for ranges from the primary // queue. The RS_Split ranges already failed to do this, and they should not @@ -2955,7 +3041,7 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, // This gives a better picture of the interference to split around. if (Stage < RS_Split) { setStage(VirtReg, RS_Split); - DEBUG(dbgs() << "wait for second round\n"); + LLVM_DEBUG(dbgs() << "wait for second round\n"); NewVRegs.push_back(VirtReg.reg); return 0; } @@ -2984,7 +3070,7 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, // We would need a deep integration with the spiller to do the // right thing here. Anyway, that is still good for early testing. setStage(VirtReg, RS_Memory); - DEBUG(dbgs() << "Do as if this register is in memory\n"); + LLVM_DEBUG(dbgs() << "Do as if this register is in memory\n"); NewVRegs.push_back(VirtReg.reg); } else { NamedRegionTimer T("spill", "Spiller", TimerGroupName, @@ -3070,8 +3156,8 @@ void RAGreedy::reportNumberOfSplillsReloads(MachineLoop *L, unsigned &Reloads, } bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { - DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n" - << "********** Function: " << mf.getName() << '\n'); + LLVM_DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n" + << "********** Function: " << mf.getName() << '\n'); MF = &mf; TRI = MF->getSubtarget().getRegisterInfo(); @@ -3106,7 +3192,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { calculateSpillWeightsAndHints(*LIS, mf, VRM, *Loops, *MBFI); - DEBUG(LIS->dump()); + LLVM_DEBUG(LIS->dump()); SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops)); SE.reset(new SplitEditor(*SA, *AA, *LIS, *VRM, *DomTree, *MBFI)); diff --git a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp index 69a879701fae..c19001c8403d 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp @@ -62,6 +62,7 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCRegisterInfo.h" @@ -159,25 +160,25 @@ private: /// always available for the remat of all the siblings of the original reg. SmallPtrSet<MachineInstr *, 32> DeadRemats; - /// \brief Finds the initial set of vreg intervals to allocate. + /// Finds the initial set of vreg intervals to allocate. void findVRegIntervalsToAlloc(const MachineFunction &MF, LiveIntervals &LIS); - /// \brief Constructs an initial graph. + /// Constructs an initial graph. void initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM, Spiller &VRegSpiller); - /// \brief Spill the given VReg. + /// Spill the given VReg. void spillVReg(unsigned VReg, SmallVectorImpl<unsigned> &NewIntervals, MachineFunction &MF, LiveIntervals &LIS, VirtRegMap &VRM, Spiller &VRegSpiller); - /// \brief Given a solved PBQP problem maps this solution back to a register + /// Given a solved PBQP problem maps this solution back to a register /// assignment. bool mapPBQPToRegAlloc(const PBQPRAGraph &G, const PBQP::Solution &Solution, VirtRegMap &VRM, Spiller &VRegSpiller); - /// \brief Postprocessing before final spilling. Sets basic block "live in" + /// Postprocessing before final spilling. Sets basic block "live in" /// variables. void finalizeAlloc(MachineFunction &MF, LiveIntervals &LIS, VirtRegMap &VRM) const; @@ -187,7 +188,7 @@ private: char RegAllocPBQP::ID = 0; -/// @brief Set spill costs for each node in the PBQP reg-alloc graph. +/// Set spill costs for each node in the PBQP reg-alloc graph. class SpillCosts : public PBQPRAConstraint { public: void apply(PBQPRAGraph &G) override { @@ -211,7 +212,7 @@ public: } }; -/// @brief Add interference edges between overlapping vregs. +/// Add interference edges between overlapping vregs. class Interference : public PBQPRAConstraint { private: using AllowedRegVecPtr = const PBQP::RegAlloc::AllowedRegVector *; @@ -561,16 +562,7 @@ void RegAllocPBQP::findVRegIntervalsToAlloc(const MachineFunction &MF, unsigned Reg = TargetRegisterInfo::index2VirtReg(I); if (MRI.reg_nodbg_empty(Reg)) continue; - LiveInterval &LI = LIS.getInterval(Reg); - - // If this live interval is non-empty we will use pbqp to allocate it. - // Empty intervals we allocate in a simple post-processing stage in - // finalizeAlloc. - if (!LI.empty()) { - VRegsToAlloc.insert(LI.reg); - } else { - EmptyIntervalVRegs.insert(LI.reg); - } + VRegsToAlloc.insert(Reg); } } @@ -594,13 +586,24 @@ void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM, std::vector<unsigned> Worklist(VRegsToAlloc.begin(), VRegsToAlloc.end()); + std::map<unsigned, std::vector<unsigned>> VRegAllowedMap; + while (!Worklist.empty()) { unsigned VReg = Worklist.back(); Worklist.pop_back(); - const TargetRegisterClass *TRC = MRI.getRegClass(VReg); LiveInterval &VRegLI = LIS.getInterval(VReg); + // If this is an empty interval move it to the EmptyIntervalVRegs set then + // continue. + if (VRegLI.empty()) { + EmptyIntervalVRegs.insert(VRegLI.reg); + VRegsToAlloc.erase(VRegLI.reg); + continue; + } + + const TargetRegisterClass *TRC = MRI.getRegClass(VReg); + // Record any overlaps with regmask operands. BitVector RegMaskOverlaps; LIS.checkRegMaskInterference(VRegLI, RegMaskOverlaps); @@ -639,8 +642,22 @@ void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM, spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller); Worklist.insert(Worklist.end(), NewVRegs.begin(), NewVRegs.end()); continue; + } else + VRegAllowedMap[VReg] = std::move(VRegAllowed); + } + + for (auto &KV : VRegAllowedMap) { + auto VReg = KV.first; + + // Move empty intervals to the EmptyIntervalVReg set. + if (LIS.getInterval(VReg).empty()) { + EmptyIntervalVRegs.insert(VReg); + VRegsToAlloc.erase(VReg); + continue; } + auto &VRegAllowed = KV.second; + PBQPRAGraph::RawVector NodeCosts(VRegAllowed.size() + 1, 0); // Tweak cost of callee saved registers, as using then force spilling and @@ -668,8 +685,8 @@ void RegAllocPBQP::spillVReg(unsigned VReg, const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); (void)TRI; - DEBUG(dbgs() << "VREG " << printReg(VReg, &TRI) << " -> SPILLED (Cost: " - << LRE.getParent().weight << ", New vregs: "); + LLVM_DEBUG(dbgs() << "VREG " << printReg(VReg, &TRI) << " -> SPILLED (Cost: " + << LRE.getParent().weight << ", New vregs: "); // Copy any newly inserted live intervals into the list of regs to // allocate. @@ -677,11 +694,11 @@ void RegAllocPBQP::spillVReg(unsigned VReg, I != E; ++I) { const LiveInterval &LI = LIS.getInterval(*I); assert(!LI.empty() && "Empty spill range."); - DEBUG(dbgs() << printReg(LI.reg, &TRI) << " "); + LLVM_DEBUG(dbgs() << printReg(LI.reg, &TRI) << " "); VRegsToAlloc.insert(LI.reg); } - DEBUG(dbgs() << ")\n"); + LLVM_DEBUG(dbgs() << ")\n"); } bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G, @@ -707,8 +724,8 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G, if (AllocOption != PBQP::RegAlloc::getSpillOptionIdx()) { unsigned PReg = G.getNodeMetadata(NId).getAllowedRegs()[AllocOption - 1]; - DEBUG(dbgs() << "VREG " << printReg(VReg, &TRI) << " -> " - << TRI.getName(PReg) << "\n"); + LLVM_DEBUG(dbgs() << "VREG " << printReg(VReg, &TRI) << " -> " + << TRI.getName(PReg) << "\n"); assert(PReg != 0 && "Invalid preg selected."); VRM.assignVirt2Phys(VReg, PReg); } else { @@ -784,7 +801,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { MF.getRegInfo().freezeReservedRegs(MF); - DEBUG(dbgs() << "PBQP Register Allocating for " << MF.getName() << "\n"); + LLVM_DEBUG(dbgs() << "PBQP Register Allocating for " << MF.getName() << "\n"); // Allocator main loop: // @@ -819,7 +836,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { unsigned Round = 0; while (!PBQPAllocComplete) { - DEBUG(dbgs() << " PBQP Regalloc round " << Round << ":\n"); + LLVM_DEBUG(dbgs() << " PBQP Regalloc round " << Round << ":\n"); PBQPRAGraph G(PBQPRAGraph::GraphMetadata(MF, LIS, MBFI)); initializeGraph(G, VRM, *VRegSpiller); @@ -833,8 +850,8 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { ".pbqpgraph"; std::error_code EC; raw_fd_ostream OS(GraphFileName, EC, sys::fs::F_Text); - DEBUG(dbgs() << "Dumping graph for round " << Round << " to \"" - << GraphFileName << "\"\n"); + LLVM_DEBUG(dbgs() << "Dumping graph for round " << Round << " to \"" + << GraphFileName << "\"\n"); G.dump(OS); } #endif @@ -851,7 +868,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { VRegsToAlloc.clear(); EmptyIntervalVRegs.clear(); - DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << VRM << "\n"); + LLVM_DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << VRM << "\n"); return true; } diff --git a/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp b/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp index f49ea25bbf35..f1c442ac38ae 100644 --- a/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp +++ b/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp @@ -36,11 +36,8 @@ using namespace llvm; STATISTIC(NumCSROpt, "Number of functions optimized for callee saved registers"); -namespace llvm { -void initializeRegUsageInfoCollectorPass(PassRegistry &); -} - namespace { + class RegUsageInfoCollector : public MachineFunctionPass { public: RegUsageInfoCollector() : MachineFunctionPass(ID) { @@ -52,12 +49,21 @@ public: return "Register Usage Information Collector Pass"; } - void getAnalysisUsage(AnalysisUsage &AU) const override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<PhysicalRegisterUsageInfo>(); + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } bool runOnMachineFunction(MachineFunction &MF) override; + // Call determineCalleeSaves and then also set the bits for subregs and + // fully saved superregs. + static void computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF); + static char ID; }; + } // end of anonymous namespace char RegUsageInfoCollector::ID = 0; @@ -72,36 +78,32 @@ FunctionPass *llvm::createRegUsageInfoCollector() { return new RegUsageInfoCollector(); } -void RegUsageInfoCollector::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<PhysicalRegisterUsageInfo>(); - AU.setPreservesAll(); - MachineFunctionPass::getAnalysisUsage(AU); -} - bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo *MRI = &MF.getRegInfo(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); const TargetMachine &TM = MF.getTarget(); - DEBUG(dbgs() << " -------------------- " << getPassName() - << " -------------------- \n"); - DEBUG(dbgs() << "Function Name : " << MF.getName() << "\n"); + LLVM_DEBUG(dbgs() << " -------------------- " << getPassName() + << " -------------------- \n"); + LLVM_DEBUG(dbgs() << "Function Name : " << MF.getName() << "\n"); std::vector<uint32_t> RegMask; // Compute the size of the bit vector to represent all the registers. // The bit vector is broken into 32-bit chunks, thus takes the ceil of // the number of registers divided by 32 for the size. - unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32; - RegMask.resize(RegMaskSize, 0xFFFFFFFF); + unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs()); + RegMask.resize(RegMaskSize, ~((uint32_t)0)); const Function &F = MF.getFunction(); - PhysicalRegisterUsageInfo *PRUI = &getAnalysis<PhysicalRegisterUsageInfo>(); + PhysicalRegisterUsageInfo &PRUI = getAnalysis<PhysicalRegisterUsageInfo>(); + PRUI.setTargetMachine(TM); - PRUI->setTargetMachine(&TM); + LLVM_DEBUG(dbgs() << "Clobbered Registers: "); - DEBUG(dbgs() << "Clobbered Registers: "); + BitVector SavedRegs; + computeCalleeSavedRegs(SavedRegs, MF); const BitVector &UsedPhysRegsMask = MRI->getUsedPhysRegsMask(); auto SetRegAsDefined = [&RegMask] (unsigned Reg) { @@ -110,42 +112,82 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) { // Scan all the physical registers. When a register is defined in the current // function set it and all the aliasing registers as defined in the regmask. for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) { - // If a register is in the UsedPhysRegsMask set then mark it as defined. - // All it's aliases will also be in the set, so we can skip setting - // as defined all the aliases here. - if (UsedPhysRegsMask.test(PReg)) { - SetRegAsDefined(PReg); + // Don't count registers that are saved and restored. + if (SavedRegs.test(PReg)) continue; - } // If a register is defined by an instruction mark it as defined together - // with all it's aliases. + // with all it's unsaved aliases. if (!MRI->def_empty(PReg)) { for (MCRegAliasIterator AI(PReg, TRI, true); AI.isValid(); ++AI) - SetRegAsDefined(*AI); + if (!SavedRegs.test(*AI)) + SetRegAsDefined(*AI); + continue; } + // If a register is in the UsedPhysRegsMask set then mark it as defined. + // All clobbered aliases will also be in the set, so we can skip setting + // as defined all the aliases here. + if (UsedPhysRegsMask.test(PReg)) + SetRegAsDefined(PReg); } - if (!TargetFrameLowering::isSafeForNoCSROpt(F)) { - const uint32_t *CallPreservedMask = - TRI->getCallPreservedMask(MF, F.getCallingConv()); - if (CallPreservedMask) { - // Set callee saved register as preserved. - for (unsigned i = 0; i < RegMaskSize; ++i) - RegMask[i] = RegMask[i] | CallPreservedMask[i]; - } - } else { + if (TargetFrameLowering::isSafeForNoCSROpt(F)) { ++NumCSROpt; - DEBUG(dbgs() << MF.getName() - << " function optimized for not having CSR.\n"); + LLVM_DEBUG(dbgs() << MF.getName() + << " function optimized for not having CSR.\n"); } for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) if (MachineOperand::clobbersPhysReg(&(RegMask[0]), PReg)) - DEBUG(dbgs() << printReg(PReg, TRI) << " "); + LLVM_DEBUG(dbgs() << printReg(PReg, TRI) << " "); - DEBUG(dbgs() << " \n----------------------------------------\n"); + LLVM_DEBUG(dbgs() << " \n----------------------------------------\n"); - PRUI->storeUpdateRegUsageInfo(&F, std::move(RegMask)); + PRUI.storeUpdateRegUsageInfo(F, RegMask); return false; } + +void RegUsageInfoCollector:: +computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF) { + const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + + // Target will return the set of registers that it saves/restores as needed. + SavedRegs.clear(); + TFI.determineCalleeSaves(MF, SavedRegs); + + // Insert subregs. + const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF); + for (unsigned i = 0; CSRegs[i]; ++i) { + unsigned Reg = CSRegs[i]; + if (SavedRegs.test(Reg)) + for (MCSubRegIterator SR(Reg, &TRI, false); SR.isValid(); ++SR) + SavedRegs.set(*SR); + } + + // Insert any register fully saved via subregisters. + for (unsigned PReg = 1, PRegE = TRI.getNumRegs(); PReg < PRegE; ++PReg) { + if (SavedRegs.test(PReg)) + continue; + + // Check if PReg is fully covered by its subregs. + bool CoveredBySubRegs = false; + for (const TargetRegisterClass *RC : TRI.regclasses()) + if (RC->CoveredBySubRegs && RC->contains(PReg)) { + CoveredBySubRegs = true; + break; + } + if (!CoveredBySubRegs) + continue; + + // Add PReg to SavedRegs if all subregs are saved. + bool AllSubRegsSaved = true; + for (MCSubRegIterator SR(PReg, &TRI, false); SR.isValid(); ++SR) + if (!SavedRegs.test(*SR)) { + AllSubRegsSaved = false; + break; + } + if (AllSubRegsSaved) + SavedRegs.set(PReg); + } +} diff --git a/contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp b/contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp index 5b12d00e126f..256de295821d 100644 --- a/contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp +++ b/contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp @@ -34,10 +34,6 @@ #include <map> #include <string> -namespace llvm { -void initializeRegUsageInfoPropagationPassPass(PassRegistry &); -} - using namespace llvm; #define DEBUG_TYPE "ip-regalloc" @@ -45,54 +41,56 @@ using namespace llvm; #define RUIP_NAME "Register Usage Information Propagation" namespace { -class RegUsageInfoPropagationPass : public MachineFunctionPass { +class RegUsageInfoPropagation : public MachineFunctionPass { public: - RegUsageInfoPropagationPass() : MachineFunctionPass(ID) { + RegUsageInfoPropagation() : MachineFunctionPass(ID) { PassRegistry &Registry = *PassRegistry::getPassRegistry(); - initializeRegUsageInfoPropagationPassPass(Registry); + initializeRegUsageInfoPropagationPass(Registry); } StringRef getPassName() const override { return RUIP_NAME; } bool runOnMachineFunction(MachineFunction &MF) override; - void getAnalysisUsage(AnalysisUsage &AU) const override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<PhysicalRegisterUsageInfo>(); + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } static char ID; private: - static void setRegMask(MachineInstr &MI, const uint32_t *RegMask) { + static void setRegMask(MachineInstr &MI, ArrayRef<uint32_t> RegMask) { + assert(RegMask.size() == + MachineOperand::getRegMaskSize(MI.getParent()->getParent() + ->getRegInfo().getTargetRegisterInfo() + ->getNumRegs()) + && "expected register mask size"); for (MachineOperand &MO : MI.operands()) { if (MO.isRegMask()) - MO.setRegMask(RegMask); + MO.setRegMask(RegMask.data()); } } }; + } // end of anonymous namespace -char RegUsageInfoPropagationPass::ID = 0; -INITIALIZE_PASS_BEGIN(RegUsageInfoPropagationPass, "reg-usage-propagation", +INITIALIZE_PASS_BEGIN(RegUsageInfoPropagation, "reg-usage-propagation", RUIP_NAME, false, false) INITIALIZE_PASS_DEPENDENCY(PhysicalRegisterUsageInfo) -INITIALIZE_PASS_END(RegUsageInfoPropagationPass, "reg-usage-propagation", +INITIALIZE_PASS_END(RegUsageInfoPropagation, "reg-usage-propagation", RUIP_NAME, false, false) -FunctionPass *llvm::createRegUsageInfoPropPass() { - return new RegUsageInfoPropagationPass(); -} - -void RegUsageInfoPropagationPass::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<PhysicalRegisterUsageInfo>(); - AU.setPreservesAll(); - MachineFunctionPass::getAnalysisUsage(AU); -} +char RegUsageInfoPropagation::ID = 0; // Assumes call instructions have a single reference to a function. -static const Function *findCalledFunction(const Module &M, MachineInstr &MI) { - for (MachineOperand &MO : MI.operands()) { +static const Function *findCalledFunction(const Module &M, + const MachineInstr &MI) { + for (const MachineOperand &MO : MI.operands()) { if (MO.isGlobal()) - return dyn_cast<Function>(MO.getGlobal()); + return dyn_cast<const Function>(MO.getGlobal()); if (MO.isSymbol()) return M.getFunction(MO.getSymbolName()); @@ -101,13 +99,13 @@ static const Function *findCalledFunction(const Module &M, MachineInstr &MI) { return nullptr; } -bool RegUsageInfoPropagationPass::runOnMachineFunction(MachineFunction &MF) { - const Module *M = MF.getFunction().getParent(); +bool RegUsageInfoPropagation::runOnMachineFunction(MachineFunction &MF) { + const Module &M = *MF.getFunction().getParent(); PhysicalRegisterUsageInfo *PRUI = &getAnalysis<PhysicalRegisterUsageInfo>(); - DEBUG(dbgs() << " ++++++++++++++++++++ " << getPassName() - << " ++++++++++++++++++++ \n"); - DEBUG(dbgs() << "MachineFunction : " << MF.getName() << "\n"); + LLVM_DEBUG(dbgs() << " ++++++++++++++++++++ " << getPassName() + << " ++++++++++++++++++++ \n"); + LLVM_DEBUG(dbgs() << "MachineFunction : " << MF.getName() << "\n"); const MachineFrameInfo &MFI = MF.getFrameInfo(); if (!MFI.hasCalls() && !MFI.hasTailCall()) @@ -119,30 +117,37 @@ bool RegUsageInfoPropagationPass::runOnMachineFunction(MachineFunction &MF) { for (MachineInstr &MI : MBB) { if (!MI.isCall()) continue; - DEBUG(dbgs() - << "Call Instruction Before Register Usage Info Propagation : \n"); - DEBUG(dbgs() << MI << "\n"); - - auto UpdateRegMask = [&](const Function *F) { - const auto *RegMask = PRUI->getRegUsageInfo(F); - if (!RegMask) + LLVM_DEBUG( + dbgs() + << "Call Instruction Before Register Usage Info Propagation : \n"); + LLVM_DEBUG(dbgs() << MI << "\n"); + + auto UpdateRegMask = [&](const Function &F) { + const ArrayRef<uint32_t> RegMask = PRUI->getRegUsageInfo(F); + if (RegMask.empty()) return; - setRegMask(MI, &(*RegMask)[0]); + setRegMask(MI, RegMask); Changed = true; }; - if (const Function *F = findCalledFunction(*M, MI)) { - UpdateRegMask(F); + if (const Function *F = findCalledFunction(M, MI)) { + UpdateRegMask(*F); } else { - DEBUG(dbgs() << "Failed to find call target function\n"); + LLVM_DEBUG(dbgs() << "Failed to find call target function\n"); } - DEBUG(dbgs() << "Call Instruction After Register Usage Info Propagation : " - << MI << '\n'); + LLVM_DEBUG( + dbgs() << "Call Instruction After Register Usage Info Propagation : " + << MI << '\n'); } } - DEBUG(dbgs() << " +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" - "++++++ \n"); + LLVM_DEBUG( + dbgs() << " +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" + "++++++ \n"); return Changed; } + +FunctionPass *llvm::createRegUsageInfoPropPass() { + return new RegUsageInfoPropagation(); +} diff --git a/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp index b0eeb81f583e..add8faec97d4 100644 --- a/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp @@ -49,9 +49,6 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) { if (MF->getSubtarget().getRegisterInfo() != TRI) { TRI = MF->getSubtarget().getRegisterInfo(); RegClass.reset(new RCInfo[TRI->getNumRegClasses()]); - unsigned NumPSets = TRI->getNumRegPressureSets(); - PSetLimits.reset(new unsigned[NumPSets]); - std::fill(&PSetLimits[0], &PSetLimits[NumPSets], 0); Update = true; } @@ -80,8 +77,12 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) { } // Invalidate cached information from previous function. - if (Update) + if (Update) { + unsigned NumPSets = TRI->getNumRegPressureSets(); + PSetLimits.reset(new unsigned[NumPSets]); + std::fill(&PSetLimits[0], &PSetLimits[NumPSets], 0); ++Tag; + } } /// compute - Compute the preferred allocation order for RC with reserved @@ -150,7 +151,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { RCI.MinCost = uint8_t(MinCost); RCI.LastCostChange = LastCostChange; - DEBUG({ + LLVM_DEBUG({ dbgs() << "AllocationOrder(" << TRI->getRegClassName(RC) << ") = ["; for (unsigned I = 0; I != RCI.NumRegs; ++I) dbgs() << ' ' << printReg(RCI.Order[I], TRI); diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp index 00a2e93c71ca..cad13a60efd2 100644 --- a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -115,11 +115,11 @@ namespace { /// checked for smaller live intervals. bool ShrinkMainRange; - /// \brief True if the coalescer should aggressively coalesce global copies + /// True if the coalescer should aggressively coalesce global copies /// in favor of keeping local copies. bool JoinGlobalCopies; - /// \brief True if the coalescer should aggressively coalesce fall-thru + /// True if the coalescer should aggressively coalesce fall-thru /// blocks exclusively containing copies. bool JoinSplitEdges; @@ -162,7 +162,7 @@ namespace { /// was successfully coalesced away. If it is not currently possible to /// coalesce this interval, but it may be possible if other things get /// coalesced, then it returns true by reference in 'Again'. - bool joinCopy(MachineInstr *TheCopy, bool &Again); + bool joinCopy(MachineInstr *CopyMI, bool &Again); /// Attempt to join these two intervals. On failure, this /// returns false. The output "SrcInt" will not have been modified, so we @@ -233,9 +233,11 @@ namespace { void addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx, MachineOperand &MO, unsigned SubRegIdx); - /// Handle copies of undef values. - /// Returns true if @p CopyMI was a copy of an undef value and eliminated. - bool eliminateUndefCopy(MachineInstr *CopyMI); + /// Handle copies of undef values. If the undef value is an incoming + /// PHI value, it will convert @p CopyMI to an IMPLICIT_DEF. + /// Returns nullptr if @p CopyMI was not in any way eliminable. Otherwise, + /// it returns @p CopyMI (which could be an IMPLICIT_DEF at this point). + MachineInstr *eliminateUndefCopy(MachineInstr *CopyMI); /// Check whether or not we should apply the terminal rule on the /// destination (Dst) of \p Copy. @@ -568,7 +570,7 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, // in IntB, we can merge them. if (ValS+1 != BS) return false; - DEBUG(dbgs() << "Extending: " << printReg(IntB.reg, TRI)); + LLVM_DEBUG(dbgs() << "Extending: " << printReg(IntB.reg, TRI)); SlotIndex FillerStart = ValS->end, FillerEnd = BS->start; // We are about to delete CopyMI, so need to remove it as the 'instruction @@ -587,6 +589,13 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, // Do the same for the subregister segments. for (LiveInterval::SubRange &S : IntB.subranges()) { + // Check for SubRange Segments of the form [1234r,1234d:0) which can be + // removed to prevent creating bogus SubRange Segments. + LiveInterval::iterator SS = S.FindSegmentContaining(CopyIdx); + if (SS != S.end() && SlotIndex::isSameInstr(SS->start, SS->end)) { + S.removeSegment(*SS, true); + continue; + } VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx); S.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, SubBValNo)); VNInfo *SubValSNo = S.getVNInfoAt(AValNo->def.getPrevSlot()); @@ -594,7 +603,7 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, S.MergeValueNumberInto(SubBValNo, SubValSNo); } - DEBUG(dbgs() << " result = " << IntB << '\n'); + LLVM_DEBUG(dbgs() << " result = " << IntB << '\n'); // If the source instruction was killing the source register before the // merge, unset the isKill marker given the live range has been extended. @@ -603,11 +612,21 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, ValSEndInst->getOperand(UIdx).setIsKill(false); } - // Rewrite the copy. If the copy instruction was killing the destination - // register before the merge, find the last use and trim the live range. That - // will also add the isKill marker. + // Rewrite the copy. CopyMI->substituteRegister(IntA.reg, IntB.reg, 0, *TRI); - if (AS->end == CopyIdx) + // If the copy instruction was killing the destination register or any + // subrange before the merge trim the live range. + bool RecomputeLiveRange = AS->end == CopyIdx; + if (!RecomputeLiveRange) { + for (LiveInterval::SubRange &S : IntA.subranges()) { + LiveInterval::iterator SS = S.FindSegmentContaining(CopyUseIdx); + if (SS != S.end() && SS->end == CopyIdx) { + RecomputeLiveRange = true; + break; + } + } + } + if (RecomputeLiveRange) shrinkToUses(&IntA); ++numExtends; @@ -641,7 +660,7 @@ bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA, return false; } -/// Copy segements with value number @p SrcValNo from liverange @p Src to live +/// Copy segments with value number @p SrcValNo from liverange @p Src to live /// range @Dst and use value number @p DstValNo there. static void addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo, const LiveRange &Src, const VNInfo *SrcValNo) { @@ -742,8 +761,8 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, return false; } - DEBUG(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t' - << *DefMI); + LLVM_DEBUG(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t' + << *DefMI); // At this point we have decided that it is legal to do this // transformation. Start by commuting the instruction. @@ -812,7 +831,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, VNInfo *DVNI = IntB.getVNInfoAt(DefIdx); if (!DVNI) continue; - DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI); + LLVM_DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI); assert(DVNI->def == DefIdx); BValNo = IntB.MergeValueNumberInto(DVNI, BValNo); for (LiveInterval::SubRange &S : IntB.subranges()) { @@ -853,11 +872,11 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, BValNo->def = AValNo->def; addSegmentsWithValNo(IntB, BValNo, IntA, AValNo); - DEBUG(dbgs() << "\t\textended: " << IntB << '\n'); + LLVM_DEBUG(dbgs() << "\t\textended: " << IntB << '\n'); LIS->removeVRegDefAt(IntA, AValNo->def); - DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n'); + LLVM_DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n'); ++numCommutes; return true; } @@ -989,13 +1008,24 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP, if (CopyLeftBB && CopyLeftBB->succ_size() > 1) return false; - // Now ok to move copy. + // Now (almost sure it's) ok to move copy. if (CopyLeftBB) { - DEBUG(dbgs() << "\tremovePartialRedundancy: Move the copy to " - << printMBBReference(*CopyLeftBB) << '\t' << CopyMI); + // Position in CopyLeftBB where we should insert new copy. + auto InsPos = CopyLeftBB->getFirstTerminator(); + + // Make sure that B isn't referenced in the terminators (if any) at the end + // of the predecessor since we're about to insert a new definition of B + // before them. + if (InsPos != CopyLeftBB->end()) { + SlotIndex InsPosIdx = LIS->getInstructionIndex(*InsPos).getRegSlot(true); + if (IntB.overlaps(InsPosIdx, LIS->getMBBEndIdx(CopyLeftBB))) + return false; + } + + LLVM_DEBUG(dbgs() << "\tremovePartialRedundancy: Move the copy to " + << printMBBReference(*CopyLeftBB) << '\t' << CopyMI); // Insert new copy to CopyLeftBB. - auto InsPos = CopyLeftBB->getFirstTerminator(); MachineInstr *NewCopyMI = BuildMI(*CopyLeftBB, InsPos, CopyMI.getDebugLoc(), TII->get(TargetOpcode::COPY), IntB.reg) .addReg(IntA.reg); @@ -1010,8 +1040,8 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP, // the deleted list. ErasedInstrs.erase(NewCopyMI); } else { - DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from " - << printMBBReference(MBB) << '\t' << CopyMI); + LLVM_DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from " + << printMBBReference(MBB) << '\t' << CopyMI); } // Remove CopyMI. @@ -1039,6 +1069,8 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP, BValNo->markUnused(); LIS->extendToIndices(SR, EndPoints); } + // If any dead defs were extended, truncate them. + shrinkToUses(&IntB); // Finally, update the live-range of IntA. shrinkToUses(&IntA); @@ -1174,7 +1206,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, I != E; ++I) { MachineOperand &MO = CopyMI->getOperand(I); if (MO.isReg()) { - assert(MO.isImplicit() && "No explicit operands after implict operands."); + assert(MO.isImplicit() && "No explicit operands after implicit operands."); // Discard VReg implicit defs. if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) ImplicitOps.push_back(MO); @@ -1220,6 +1252,11 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, // Update machine operands and add flags. updateRegDefsUses(DstReg, DstReg, DstIdx); NewMI.getOperand(0).setSubReg(NewIdx); + // updateRegDefUses can add an "undef" flag to the definition, since + // it will replace DstReg with DstReg.DstIdx. If NewIdx is 0, make + // sure that "undef" is not set. + if (NewIdx == 0) + NewMI.getOperand(0).setIsUndef(false); // Add dead subregister definitions if we are defining the whole register // but only part of it is live. // This could happen if the rematerialization instruction is rematerializing @@ -1266,8 +1303,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, bool UpdatedSubRanges = false; for (LiveInterval::SubRange &SR : DstInt.subranges()) { if ((SR.LaneMask & DstMask).none()) { - DEBUG(dbgs() << "Removing undefined SubRange " - << PrintLaneMask(SR.LaneMask) << " : " << SR << "\n"); + LLVM_DEBUG(dbgs() + << "Removing undefined SubRange " + << PrintLaneMask(SR.LaneMask) << " : " << SR << "\n"); // VNI is in ValNo - remove any segments in this SubRange that have this ValNo if (VNInfo *RmValNo = SR.getVNInfoAt(CurrIdx.getRegSlot())) { SR.removeValNo(RmValNo); @@ -1299,7 +1337,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, // %1 = somedef ; %1 GR8 // dead ECX = remat ; implicit-def CL // = somedef %1 ; %1 GR8 - // %1 will see the inteferences with CL but not with CH since + // %1 will see the interferences with CL but not with CH since // no live-ranges would have been created for ECX. // Fix that! SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI); @@ -1324,7 +1362,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator()); } - DEBUG(dbgs() << "Remat: " << NewMI); + LLVM_DEBUG(dbgs() << "Remat: " << NewMI); ++NumReMats; // The source interval can become smaller because we removed a use. @@ -1339,7 +1377,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, // Move the debug value directly after the def of the rematerialized // value in DstReg. MBB->splice(std::next(NewMI.getIterator()), UseMI->getParent(), UseMI); - DEBUG(dbgs() << "\t\tupdated: " << *UseMI); + LLVM_DEBUG(dbgs() << "\t\tupdated: " << *UseMI); } } eliminateDeadDefs(); @@ -1348,9 +1386,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, return true; } -bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { - // ProcessImpicitDefs may leave some copies of <undef> values, it only removes - // local variables. When we have a copy like: +MachineInstr *RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { + // ProcessImplicitDefs may leave some copies of <undef> values, it only + // removes local variables. When we have a copy like: // // %1 = COPY undef %2 // @@ -1372,16 +1410,34 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { if ((SR.LaneMask & SrcMask).none()) continue; if (SR.liveAt(Idx)) - return false; + return nullptr; } } else if (SrcLI.liveAt(Idx)) - return false; + return nullptr; - DEBUG(dbgs() << "\tEliminating copy of <undef> value\n"); - - // Remove any DstReg segments starting at the instruction. + // If the undef copy defines a live-out value (i.e. an input to a PHI def), + // then replace it with an IMPLICIT_DEF. LiveInterval &DstLI = LIS->getInterval(DstReg); SlotIndex RegIndex = Idx.getRegSlot(); + LiveRange::Segment *Seg = DstLI.getSegmentContaining(RegIndex); + assert(Seg != nullptr && "No segment for defining instruction"); + if (VNInfo *V = DstLI.getVNInfoAt(Seg->end)) { + if (V->isPHIDef()) { + CopyMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); + for (unsigned i = CopyMI->getNumOperands(); i != 0; --i) { + MachineOperand &MO = CopyMI->getOperand(i-1); + if (MO.isReg() && MO.isUse()) + CopyMI->RemoveOperand(i-1); + } + LLVM_DEBUG(dbgs() << "\tReplaced copy of <undef> value with an " + "implicit def\n"); + return CopyMI; + } + } + + // Remove any DstReg segments starting at the instruction. + LLVM_DEBUG(dbgs() << "\tEliminating copy of <undef> value\n"); + // Remove value or merge with previous one in case of a subregister def. if (VNInfo *PrevVNI = DstLI.getVNInfoAt(Idx)) { VNInfo *VNI = DstLI.getVNInfoAt(RegIndex); @@ -1424,7 +1480,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { if (isLive) continue; MO.setIsUndef(true); - DEBUG(dbgs() << "\tnew undef: " << UseIdx << '\t' << MI); + LLVM_DEBUG(dbgs() << "\tnew undef: " << UseIdx << '\t' << MI); } // A def of a subregister may be a use of the other subregisters, so @@ -1437,7 +1493,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { MO.setIsUndef(true); LIS->shrinkToUses(&DstLI); - return true; + return CopyMI; } void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx, @@ -1539,12 +1595,12 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, MO.substVirtReg(DstReg, SubIdx, *TRI); } - DEBUG({ - dbgs() << "\t\tupdated: "; - if (!UseMI->isDebugValue()) - dbgs() << LIS->getInstructionIndex(*UseMI) << "\t"; - dbgs() << *UseMI; - }); + LLVM_DEBUG({ + dbgs() << "\t\tupdated: "; + if (!UseMI->isDebugValue()) + dbgs() << LIS->getInstructionIndex(*UseMI) << "\t"; + dbgs() << *UseMI; + }); } } @@ -1553,7 +1609,7 @@ bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) { // reserved register. This doesn't increase register pressure, so it is // always beneficial. if (!MRI->isReserved(CP.getDstReg())) { - DEBUG(dbgs() << "\tCan only merge into reserved registers.\n"); + LLVM_DEBUG(dbgs() << "\tCan only merge into reserved registers.\n"); return false; } @@ -1561,17 +1617,18 @@ bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) { if (JoinVInt.containsOneValue()) return true; - DEBUG(dbgs() << "\tCannot join complex intervals into reserved register.\n"); + LLVM_DEBUG( + dbgs() << "\tCannot join complex intervals into reserved register.\n"); return false; } bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { Again = false; - DEBUG(dbgs() << LIS->getInstructionIndex(*CopyMI) << '\t' << *CopyMI); + LLVM_DEBUG(dbgs() << LIS->getInstructionIndex(*CopyMI) << '\t' << *CopyMI); CoalescerPair CP(*TRI); if (!CP.setRegisters(CopyMI)) { - DEBUG(dbgs() << "\tNot coalescable.\n"); + LLVM_DEBUG(dbgs() << "\tNot coalescable.\n"); return false; } @@ -1586,7 +1643,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { } if (!TRI->shouldCoalesce(CopyMI, SrcRC, SrcIdx, DstRC, DstIdx, CP.getNewRC(), *LIS)) { - DEBUG(dbgs() << "\tSubtarget bailed on coalescing.\n"); + LLVM_DEBUG(dbgs() << "\tSubtarget bailed on coalescing.\n"); return false; } } @@ -1595,16 +1652,21 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { // sometimes dead copies slip through, and we can't generate invalid live // ranges. if (!CP.isPhys() && CopyMI->allDefsAreDead()) { - DEBUG(dbgs() << "\tCopy is dead.\n"); + LLVM_DEBUG(dbgs() << "\tCopy is dead.\n"); DeadDefs.push_back(CopyMI); eliminateDeadDefs(); return true; } // Eliminate undefs. - if (!CP.isPhys() && eliminateUndefCopy(CopyMI)) { - deleteInstr(CopyMI); - return false; // Not coalescable. + if (!CP.isPhys()) { + // If this is an IMPLICIT_DEF, leave it alone, but don't try to coalesce. + if (MachineInstr *UndefMI = eliminateUndefCopy(CopyMI)) { + if (UndefMI->isImplicitDef()) + return false; + deleteInstr(CopyMI); + return false; // Not coalescable. + } } // Coalesced copies are normally removed immediately, but transformations @@ -1612,7 +1674,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { // When that happens, just join the values and remove the copy. if (CP.getSrcReg() == CP.getDstReg()) { LiveInterval &LI = LIS->getInterval(CP.getSrcReg()); - DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n'); + LLVM_DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n'); const SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI); LiveQueryResult LRQ = LI.Query(CopyIdx); if (VNInfo *DefVNI = LRQ.valueDefined()) { @@ -1629,7 +1691,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { S.MergeValueNumberInto(SDefVNI, SReadVNI); } } - DEBUG(dbgs() << "\tMerged values: " << LI << '\n'); + LLVM_DEBUG(dbgs() << "\tMerged values: " << LI << '\n'); } deleteInstr(CopyMI); return true; @@ -1637,9 +1699,9 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { // Enforce policies. if (CP.isPhys()) { - DEBUG(dbgs() << "\tConsidering merging " << printReg(CP.getSrcReg(), TRI) - << " with " << printReg(CP.getDstReg(), TRI, CP.getSrcIdx()) - << '\n'); + LLVM_DEBUG(dbgs() << "\tConsidering merging " + << printReg(CP.getSrcReg(), TRI) << " with " + << printReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n'); if (!canJoinPhys(CP)) { // Before giving up coalescing, if definition of source is defined by // trivial computation, try rematerializing it. @@ -1656,7 +1718,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { LIS->getInterval(CP.getDstReg()).size()) CP.flip(); - DEBUG({ + LLVM_DEBUG({ dbgs() << "\tConsidering merging to " << TRI->getRegClassName(CP.getNewRC()) << " with "; if (CP.getDstIdx() && CP.getSrcIdx()) @@ -1692,7 +1754,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { if (adjustCopiesBackFrom(CP, CopyMI) || removeCopyByCommutingDef(CP, CopyMI)) { deleteInstr(CopyMI); - DEBUG(dbgs() << "\tTrivial!\n"); + LLVM_DEBUG(dbgs() << "\tTrivial!\n"); return true; } } @@ -1704,7 +1766,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { return true; // Otherwise, we are unable to join the intervals. - DEBUG(dbgs() << "\tInterference!\n"); + LLVM_DEBUG(dbgs() << "\tInterference!\n"); Again = true; // May be possible to coalesce later. return false; } @@ -1738,8 +1800,8 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { for (LiveInterval::SubRange &S : LI.subranges()) { if ((S.LaneMask & ShrinkMask).none()) continue; - DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask) - << ")\n"); + LLVM_DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask) + << ")\n"); LIS->shrinkToUses(S, LI.reg); } LI.removeEmptySubRanges(); @@ -1756,7 +1818,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { // Update regalloc hint. TRI->updateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF); - DEBUG({ + LLVM_DEBUG({ dbgs() << "\tSuccess: " << printReg(CP.getSrcReg(), TRI, CP.getSrcIdx()) << " -> " << printReg(CP.getDstReg(), TRI, CP.getDstIdx()) << '\n'; dbgs() << "\tResult = "; @@ -1777,7 +1839,7 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { assert(CP.isPhys() && "Must be a physreg copy"); assert(MRI->isReserved(DstReg) && "Not a reserved register"); LiveInterval &RHS = LIS->getInterval(SrcReg); - DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n'); + LLVM_DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n'); assert(RHS.containsOneValue() && "Invalid join with reserved register"); @@ -1796,7 +1858,8 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { return false; } if (RHS.overlaps(LIS->getRegUnit(*UI))) { - DEBUG(dbgs() << "\t\tInterference: " << printRegUnit(*UI, TRI) << '\n'); + LLVM_DEBUG(dbgs() << "\t\tInterference: " << printRegUnit(*UI, TRI) + << '\n'); return false; } } @@ -1805,7 +1868,7 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { BitVector RegMaskUsable; if (LIS->checkRegMaskInterference(RHS, RegMaskUsable) && !RegMaskUsable.test(DstReg)) { - DEBUG(dbgs() << "\t\tRegMask interference\n"); + LLVM_DEBUG(dbgs() << "\t\tRegMask interference\n"); return false; } } @@ -1835,12 +1898,12 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { // %y = def // ... if (!MRI->hasOneNonDBGUse(SrcReg)) { - DEBUG(dbgs() << "\t\tMultiple vreg uses!\n"); + LLVM_DEBUG(dbgs() << "\t\tMultiple vreg uses!\n"); return false; } if (!LIS->intervalIsInOneMBB(RHS)) { - DEBUG(dbgs() << "\t\tComplex control flow!\n"); + LLVM_DEBUG(dbgs() << "\t\tComplex control flow!\n"); return false; } @@ -1858,7 +1921,7 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { SI != CopyRegIdx; SI = Indexes->getNextNonNullIndex(SI)) { MachineInstr *MI = LIS->getInstructionFromIndex(SI); if (MI->readsRegister(DstReg, TRI)) { - DEBUG(dbgs() << "\t\tInterference (read): " << *MI); + LLVM_DEBUG(dbgs() << "\t\tInterference (read): " << *MI); return false; } } @@ -1866,8 +1929,8 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { // We're going to remove the copy which defines a physical reserved // register, so remove its valno, etc. - DEBUG(dbgs() << "\t\tRemoving phys reg def of " << printReg(DstReg, TRI) - << " at " << CopyRegIdx << "\n"); + LLVM_DEBUG(dbgs() << "\t\tRemoving phys reg def of " + << printReg(DstReg, TRI) << " at " << CopyRegIdx << "\n"); LIS->removePhysRegDefAt(DstReg, CopyRegIdx); // Create a new dead def at the new def location. @@ -2057,6 +2120,13 @@ class JoinVals { /// True once Pruned above has been computed. bool PrunedComputed = false; + /// True if this value is determined to be identical to OtherVNI + /// (in valuesIdentical). This is used with CR_Erase where the erased + /// copy is redundant, i.e. the source value is already the same as + /// the destination. In such cases the subranges need to be updated + /// properly. See comment at pruneSubRegValues for more info. + bool Identical = false; + Val() = default; bool isAnalyzed() const { return WriteLanes.any(); } @@ -2073,7 +2143,7 @@ class JoinVals { /// Find the ultimate value that VNI was copied from. std::pair<const VNInfo*,unsigned> followCopyChain(const VNInfo *VNI) const; - bool valuesIdentical(VNInfo *Val0, VNInfo *Val1, const JoinVals &Other) const; + bool valuesIdentical(VNInfo *Value0, VNInfo *Value1, const JoinVals &Other) const; /// Analyze ValNo in this live range, and set all fields of Vals[ValNo]. /// Return a conflict resolution when possible, but leave the hard cases as @@ -2191,17 +2261,17 @@ LaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain( const VNInfo *VNI) const { - unsigned Reg = this->Reg; + unsigned TrackReg = Reg; while (!VNI->isPHIDef()) { SlotIndex Def = VNI->def; MachineInstr *MI = Indexes->getInstructionFromIndex(Def); assert(MI && "No defining instruction"); if (!MI->isFullCopy()) - return std::make_pair(VNI, Reg); + return std::make_pair(VNI, TrackReg); unsigned SrcReg = MI->getOperand(1).getReg(); if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) - return std::make_pair(VNI, Reg); + return std::make_pair(VNI, TrackReg); const LiveInterval &LI = LIS->getInterval(SrcReg); const VNInfo *ValueIn; @@ -2210,7 +2280,8 @@ std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain( LiveQueryResult LRQ = LI.Query(Def); ValueIn = LRQ.valueIn(); } else { - // Query subranges. Pick the first matching one. + // Query subranges. Ensure that all matching ones take us to the same def + // (allowing some of them to be undef). ValueIn = nullptr; for (const LiveInterval::SubRange &S : LI.subranges()) { // Transform lanemask to a mask in the joined live interval. @@ -2218,16 +2289,27 @@ std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain( if ((SMask & LaneMask).none()) continue; LiveQueryResult LRQ = S.Query(Def); - ValueIn = LRQ.valueIn(); - break; + if (!ValueIn) { + ValueIn = LRQ.valueIn(); + continue; + } + if (LRQ.valueIn() && ValueIn != LRQ.valueIn()) + return std::make_pair(VNI, TrackReg); } } - if (ValueIn == nullptr) - break; + if (ValueIn == nullptr) { + // Reaching an undefined value is legitimate, for example: + // + // 1 undef %0.sub1 = ... ;; %0.sub0 == undef + // 2 %1 = COPY %0 ;; %1 is defined here. + // 3 %0 = COPY %1 ;; Now %0.sub0 has a definition, + // ;; but it's equivalent to "undef". + return std::make_pair(nullptr, SrcReg); + } VNI = ValueIn; - Reg = SrcReg; + TrackReg = SrcReg; } - return std::make_pair(VNI, Reg); + return std::make_pair(VNI, TrackReg); } bool JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1, @@ -2235,12 +2317,17 @@ bool JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1, const VNInfo *Orig0; unsigned Reg0; std::tie(Orig0, Reg0) = followCopyChain(Value0); - if (Orig0 == Value1) + if (Orig0 == Value1 && Reg0 == Other.Reg) return true; const VNInfo *Orig1; unsigned Reg1; std::tie(Orig1, Reg1) = Other.followCopyChain(Value1); + // If both values are undefined, and the source registers are the same + // register, the values are identical. Filter out cases where only one + // value is defined. + if (Orig0 == nullptr || Orig1 == nullptr) + return Orig0 == Orig1 && Reg0 == Reg1; // The values are equal if they are defined at the same place and use the // same register. Note that we cannot compare VNInfos directly as some of @@ -2375,9 +2462,10 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { // to erase the IMPLICIT_DEF instruction. if (OtherV.ErasableImplicitDef && DefMI && DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)) { - DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def - << " extends into " << printMBBReference(*DefMI->getParent()) - << ", keeping it.\n"); + LLVM_DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def + << " extends into " + << printMBBReference(*DefMI->getParent()) + << ", keeping it.\n"); OtherV.ErasableImplicitDef = false; } @@ -2415,9 +2503,11 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { // %other = COPY %ext // %this = COPY %ext <-- Erase this copy // - if (DefMI->isFullCopy() && !CP.isPartial() - && valuesIdentical(VNI, V.OtherVNI, Other)) + if (DefMI->isFullCopy() && !CP.isPartial() && + valuesIdentical(VNI, V.OtherVNI, Other)) { + V.Identical = true; return CR_Erase; + } // If the lanes written by this instruction were all undef in OtherVNI, it is // still safe to join the live ranges. This can't be done with a simple value @@ -2487,11 +2577,11 @@ void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) { assert(V.OtherVNI && "OtherVNI not assigned, can't merge."); assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion"); Assignments[ValNo] = Other.Assignments[V.OtherVNI->id]; - DEBUG(dbgs() << "\t\tmerge " << printReg(Reg) << ':' << ValNo << '@' - << LR.getValNumInfo(ValNo)->def << " into " - << printReg(Other.Reg) << ':' << V.OtherVNI->id << '@' - << V.OtherVNI->def << " --> @" - << NewVNInfo[Assignments[ValNo]]->def << '\n'); + LLVM_DEBUG(dbgs() << "\t\tmerge " << printReg(Reg) << ':' << ValNo << '@' + << LR.getValNumInfo(ValNo)->def << " into " + << printReg(Other.Reg) << ':' << V.OtherVNI->id << '@' + << V.OtherVNI->def << " --> @" + << NewVNInfo[Assignments[ValNo]]->def << '\n'); break; case CR_Replace: case CR_Unresolved: { @@ -2517,8 +2607,8 @@ bool JoinVals::mapValues(JoinVals &Other) { for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { computeAssignment(i, Other); if (Vals[i].Resolution == CR_Impossible) { - DEBUG(dbgs() << "\t\tinterference at " << printReg(Reg) << ':' << i - << '@' << LR.getValNumInfo(i)->def << '\n'); + LLVM_DEBUG(dbgs() << "\t\tinterference at " << printReg(Reg) << ':' << i + << '@' << LR.getValNumInfo(i)->def << '\n'); return false; } } @@ -2540,13 +2630,13 @@ taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other, // lanes escape the block. SlotIndex End = OtherI->end; if (End >= MBBEnd) { - DEBUG(dbgs() << "\t\ttaints global " << printReg(Other.Reg) << ':' - << OtherI->valno->id << '@' << OtherI->start << '\n'); + LLVM_DEBUG(dbgs() << "\t\ttaints global " << printReg(Other.Reg) << ':' + << OtherI->valno->id << '@' << OtherI->start << '\n'); return false; } - DEBUG(dbgs() << "\t\ttaints local " << printReg(Other.Reg) << ':' - << OtherI->valno->id << '@' << OtherI->start - << " to " << End << '\n'); + LLVM_DEBUG(dbgs() << "\t\ttaints local " << printReg(Other.Reg) << ':' + << OtherI->valno->id << '@' << OtherI->start << " to " + << End << '\n'); // A dead def is not a problem. if (End.isDead()) break; @@ -2567,7 +2657,7 @@ taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other, bool JoinVals::usesLanes(const MachineInstr &MI, unsigned Reg, unsigned SubIdx, LaneBitmask Lanes) const { - if (MI.isDebugValue()) + if (MI.isDebugInstr()) return false; for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || MO.isDef() || MO.getReg() != Reg) @@ -2587,8 +2677,8 @@ bool JoinVals::resolveConflicts(JoinVals &Other) { assert(V.Resolution != CR_Impossible && "Unresolvable conflict"); if (V.Resolution != CR_Unresolved) continue; - DEBUG(dbgs() << "\t\tconflict at " << printReg(Reg) << ':' << i - << '@' << LR.getValNumInfo(i)->def << '\n'); + LLVM_DEBUG(dbgs() << "\t\tconflict at " << printReg(Reg) << ':' << i << '@' + << LR.getValNumInfo(i)->def << '\n'); if (SubRangeJoin) return false; @@ -2625,7 +2715,7 @@ bool JoinVals::resolveConflicts(JoinVals &Other) { while (true) { assert(MI != MBB->end() && "Bad LastMI"); if (usesLanes(*MI, Other.Reg, Other.SubIdx, TaintedLanes)) { - DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI); + LLVM_DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI); return false; } // LastMI is the last instruction to use the current value. @@ -2698,8 +2788,8 @@ void JoinVals::pruneValues(JoinVals &Other, if (!EraseImpDef) EndPoints.push_back(Def); } - DEBUG(dbgs() << "\t\tpruned " << printReg(Other.Reg) << " at " << Def - << ": " << Other.LR << '\n'); + LLVM_DEBUG(dbgs() << "\t\tpruned " << printReg(Other.Reg) << " at " << Def + << ": " << Other.LR << '\n'); break; } case CR_Erase: @@ -2710,8 +2800,8 @@ void JoinVals::pruneValues(JoinVals &Other, // computeAssignment(), the value that was originally copied could have // been replaced. LIS->pruneValue(LR, Def, &EndPoints); - DEBUG(dbgs() << "\t\tpruned all of " << printReg(Reg) << " at " - << Def << ": " << LR << '\n'); + LLVM_DEBUG(dbgs() << "\t\tpruned all of " << printReg(Reg) << " at " + << Def << ": " << LR << '\n'); } break; case CR_Unresolved: @@ -2721,21 +2811,65 @@ void JoinVals::pruneValues(JoinVals &Other, } } +/// Consider the following situation when coalescing the copy between +/// %31 and %45 at 800. (The vertical lines represent live range segments.) +/// +/// Main range Subrange 0004 (sub2) +/// %31 %45 %31 %45 +/// 544 %45 = COPY %28 + + +/// | v1 | v1 +/// 560B bb.1: + + +/// 624 = %45.sub2 | v2 | v2 +/// 800 %31 = COPY %45 + + + + +/// | v0 | v0 +/// 816 %31.sub1 = ... + | +/// 880 %30 = COPY %31 | v1 + +/// 928 %45 = COPY %30 | + + +/// | | v0 | v0 <--+ +/// 992B ; backedge -> bb.1 | + + | +/// 1040 = %31.sub0 + | +/// This value must remain +/// live-out! +/// +/// Assuming that %31 is coalesced into %45, the copy at 928 becomes +/// redundant, since it copies the value from %45 back into it. The +/// conflict resolution for the main range determines that %45.v0 is +/// to be erased, which is ok since %31.v1 is identical to it. +/// The problem happens with the subrange for sub2: it has to be live +/// on exit from the block, but since 928 was actually a point of +/// definition of %45.sub2, %45.sub2 was not live immediately prior +/// to that definition. As a result, when 928 was erased, the value v0 +/// for %45.sub2 was pruned in pruneSubRegValues. Consequently, an +/// IMPLICIT_DEF was inserted as a "backedge" definition for %45.sub2, +/// providing an incorrect value to the use at 624. +/// +/// Since the main-range values %31.v1 and %45.v0 were proved to be +/// identical, the corresponding values in subranges must also be the +/// same. A redundant copy is removed because it's not needed, and not +/// because it copied an undefined value, so any liveness that originated +/// from that copy cannot disappear. When pruning a value that started +/// at the removed copy, the corresponding identical value must be +/// extended to replace it. void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) { // Look for values being erased. bool DidPrune = false; for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { + Val &V = Vals[i]; // We should trigger in all cases in which eraseInstrs() does something. // match what eraseInstrs() is doing, print a message so - if (Vals[i].Resolution != CR_Erase && - (Vals[i].Resolution != CR_Keep || !Vals[i].ErasableImplicitDef || - !Vals[i].Pruned)) + if (V.Resolution != CR_Erase && + (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned)) continue; // Check subranges at the point where the copy will be removed. SlotIndex Def = LR.getValNumInfo(i)->def; + SlotIndex OtherDef; + if (V.Identical) + OtherDef = V.OtherVNI->def; + // Print message so mismatches with eraseInstrs() can be diagnosed. - DEBUG(dbgs() << "\t\tExpecting instruction removal at " << Def << '\n'); + LLVM_DEBUG(dbgs() << "\t\tExpecting instruction removal at " << Def + << '\n'); for (LiveInterval::SubRange &S : LI.subranges()) { LiveQueryResult Q = S.Query(Def); @@ -2743,19 +2877,28 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) { // copied and we must remove that subrange value as well. VNInfo *ValueOut = Q.valueOutOrDead(); if (ValueOut != nullptr && Q.valueIn() == nullptr) { - DEBUG(dbgs() << "\t\tPrune sublane " << PrintLaneMask(S.LaneMask) - << " at " << Def << "\n"); - LIS->pruneValue(S, Def, nullptr); + LLVM_DEBUG(dbgs() << "\t\tPrune sublane " << PrintLaneMask(S.LaneMask) + << " at " << Def << "\n"); + SmallVector<SlotIndex,8> EndPoints; + LIS->pruneValue(S, Def, &EndPoints); DidPrune = true; // Mark value number as unused. ValueOut->markUnused(); + + if (V.Identical && S.Query(OtherDef).valueOut()) { + // If V is identical to V.OtherVNI (and S was live at OtherDef), + // then we can't simply prune V from S. V needs to be replaced + // with V.OtherVNI. + LIS->extendToIndices(S, EndPoints); + } continue; } // If a subrange ends at the copy, then a value was copied but only // partially used later. Shrink the subregister range appropriately. if (Q.valueIn() != nullptr && Q.valueOut() == nullptr) { - DEBUG(dbgs() << "\t\tDead uses at sublane " << PrintLaneMask(S.LaneMask) - << " at " << Def << "\n"); + LLVM_DEBUG(dbgs() << "\t\tDead uses at sublane " + << PrintLaneMask(S.LaneMask) << " at " << Def + << "\n"); ShrinkMask |= S.LaneMask; } } @@ -2867,7 +3010,7 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs, std::prev(S)->end = NewEnd; } } - DEBUG({ + LLVM_DEBUG({ dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LR << '\n'; if (LI != nullptr) dbgs() << "\t\t LHS = " << *LI << '\n'; @@ -2885,7 +3028,7 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs, ShrinkRegs.push_back(Reg); } ErasedInstrs.insert(MI); - DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI); + LLVM_DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI); LIS->RemoveMachineInstrFromMaps(*MI); MI->eraseFromParent(); break; @@ -2940,13 +3083,14 @@ void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, LRange.join(RRange, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo); - DEBUG(dbgs() << "\t\tjoined lanes: " << LRange << "\n"); + LLVM_DEBUG(dbgs() << "\t\tjoined lanes: " << PrintLaneMask(LaneMask) + << ' ' << LRange << "\n"); if (EndPoints.empty()) return; // Recompute the parts of the live range we had to remove because of // CR_Replace conflicts. - DEBUG({ + LLVM_DEBUG({ dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: "; for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) { dbgs() << EndPoints[i]; @@ -2985,9 +3129,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), LaneBitmask::getNone(), NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness); - DEBUG(dbgs() << "\t\tRHS = " << RHS - << "\n\t\tLHS = " << LHS - << '\n'); + LLVM_DEBUG(dbgs() << "\t\tRHS = " << RHS << "\n\t\tLHS = " << LHS << '\n'); // First compute NewVNInfo and the simple value mappings. // Detect impossible conflicts early. @@ -3018,8 +3160,8 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { R.LaneMask = Mask; } } - DEBUG(dbgs() << "\t\tLHST = " << printReg(CP.getDstReg()) - << ' ' << LHS << '\n'); + LLVM_DEBUG(dbgs() << "\t\tLHST = " << printReg(CP.getDstReg()) << ' ' << LHS + << '\n'); // Determine lanemasks of RHS in the coalesced register and merge subranges. unsigned SrcIdx = CP.getSrcIdx(); @@ -3034,7 +3176,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { mergeSubRangeInto(LHS, R, Mask, CP); } } - DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n"); + LLVM_DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n"); // Pruning implicit defs from subranges may result in the main range // having stale segments. @@ -3072,7 +3214,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { if (!EndPoints.empty()) { // Recompute the parts of the live range we had to remove because of // CR_Replace conflicts. - DEBUG({ + LLVM_DEBUG({ dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: "; for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) { dbgs() << EndPoints[i]; @@ -3220,7 +3362,8 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const { continue; // Check that OtherReg interfere with DstReg. if (LIS->getInterval(OtherReg).overlaps(DstLI)) { - DEBUG(dbgs() << "Apply terminal rule for: " << printReg(DstReg) << '\n'); + LLVM_DEBUG(dbgs() << "Apply terminal rule for: " << printReg(DstReg) + << '\n'); return true; } } @@ -3229,7 +3372,7 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const { void RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) { - DEBUG(dbgs() << MBB->getName() << ":\n"); + LLVM_DEBUG(dbgs() << MBB->getName() << ":\n"); // Collect all copy-like instructions in MBB. Don't start coalescing anything // yet, it might invalidate the iterator. @@ -3294,7 +3437,7 @@ void RegisterCoalescer::coalesceLocals() { } void RegisterCoalescer::joinAllIntervals() { - DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n"); + LLVM_DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n"); assert(WorkList.empty() && LocalWorkList.empty() && "Old data still around."); std::vector<MBBPriorityInfo> MBBs; @@ -3350,8 +3493,8 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { // splitting optimization. JoinSplitEdges = EnableJoinSplits; - DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n" - << "********** Function: " << MF->getName() << '\n'); + LLVM_DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n" + << "********** Function: " << MF->getName() << '\n'); if (VerifyCoalescing) MF->verify(this, "Before register coalescing"); @@ -3368,14 +3511,15 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { array_pod_sort(InflateRegs.begin(), InflateRegs.end()); InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()), InflateRegs.end()); - DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size() << " regs.\n"); + LLVM_DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size() + << " regs.\n"); for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) { unsigned Reg = InflateRegs[i]; if (MRI->reg_nodbg_empty(Reg)) continue; if (MRI->recomputeRegClass(Reg)) { - DEBUG(dbgs() << printReg(Reg) << " inflated to " - << TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n'); + LLVM_DEBUG(dbgs() << printReg(Reg) << " inflated to " + << TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n'); ++NumInflated; LiveInterval &LI = LIS->getInterval(Reg); @@ -3398,7 +3542,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { } } - DEBUG(dump()); + LLVM_DEBUG(dump()); if (VerifyCoalescing) MF->verify(this, "After register coalescing"); return true; diff --git a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp index 9ac810c7c723..51414de518fd 100644 --- a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp @@ -28,6 +28,7 @@ #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/Compiler.h" @@ -587,7 +588,7 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS, for (auto I = Defs.begin(); I != Defs.end(); ) { LaneBitmask LiveAfter = getLiveLanesAt(LIS, MRI, true, I->RegUnit, Pos.getDeadSlot()); - // If the the def is all that is live after the instruction, then in case + // If the def is all that is live after the instruction, then in case // of a subregister def we need a read-undef flag. unsigned RegUnit = I->RegUnit; if (TargetRegisterInfo::isVirtualRegister(RegUnit) && @@ -635,7 +636,7 @@ void PressureDiffs::init(unsigned N) { } Max = Size; free(PDiffArray); - PDiffArray = reinterpret_cast<PressureDiff*>(calloc(N, sizeof(PressureDiff))); + PDiffArray = static_cast<PressureDiff*>(safe_calloc(N, sizeof(PressureDiff))); } void PressureDiffs::addInstruction(unsigned Idx, @@ -747,7 +748,7 @@ void RegPressureTracker::bumpDeadDefs(ArrayRef<RegisterMaskPair> DeadDefs) { /// instruction independent of liveness. void RegPressureTracker::recede(const RegisterOperands &RegOpers, SmallVectorImpl<RegisterMaskPair> *LiveUses) { - assert(!CurrPos->isDebugValue()); + assert(!CurrPos->isDebugInstr()); // Boost pressure for all dead defs together. bumpDeadDefs(RegOpers.DeadDefs); @@ -1018,7 +1019,7 @@ static void computeMaxPressureDelta(ArrayRef<unsigned> OldMaxPressureVec, /// This is intended for speculative queries. It leaves pressure inconsistent /// with the current position, so must be restored by the caller. void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) { - assert(!MI->isDebugValue() && "Expect a nondebug instruction."); + assert(!MI->isDebugInstr() && "Expect a nondebug instruction."); SlotIndex SlotIdx; if (RequireIntervals) @@ -1259,7 +1260,7 @@ LaneBitmask RegPressureTracker::getLiveThroughAt(unsigned RegUnit, /// This is intended for speculative queries. It leaves pressure inconsistent /// with the current position, so must be restored by the caller. void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) { - assert(!MI->isDebugValue() && "Expect a nondebug instruction."); + assert(!MI->isDebugInstr() && "Expect a nondebug instruction."); SlotIndex SlotIdx; if (RequireIntervals) diff --git a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp index 97967124add6..a878c34f9aa4 100644 --- a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp @@ -111,7 +111,7 @@ void RegScavenger::determineKillsAndDefs() { assert(Tracking && "Must be tracking to determine kills and defs"); MachineInstr &MI = *MBBI; - assert(!MI.isDebugValue() && "Debug values have no kills or defs"); + assert(!MI.isDebugInstr() && "Debug values have no kills or defs"); // Find out which registers are early clobbered, killed, defined, and marked // def-dead in this instruction. @@ -158,12 +158,12 @@ void RegScavenger::unprocess() { assert(Tracking && "Cannot unprocess because we're not tracking"); MachineInstr &MI = *MBBI; - if (!MI.isDebugValue()) { + if (!MI.isDebugInstr()) { determineKillsAndDefs(); // Commit the changes. - setUsed(KillRegUnits); setUnused(DefRegUnits); + setUsed(KillRegUnits); } if (MBBI == MBB->begin()) { @@ -195,7 +195,7 @@ void RegScavenger::forward() { I->Restore = nullptr; } - if (MI.isDebugValue()) + if (MI.isDebugInstr()) return; determineKillsAndDefs(); @@ -288,8 +288,8 @@ bool RegScavenger::isRegUsed(unsigned Reg, bool includeReserved) const { unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const { for (unsigned Reg : *RC) { if (!isRegUsed(Reg)) { - DEBUG(dbgs() << "Scavenger found unused reg: " << printReg(Reg, TRI) - << "\n"); + LLVM_DEBUG(dbgs() << "Scavenger found unused reg: " << printReg(Reg, TRI) + << "\n"); return Reg; } } @@ -318,7 +318,7 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, bool inVirtLiveRange = false; for (++MI; InstrLimit > 0 && MI != ME; ++MI, --InstrLimit) { - if (MI->isDebugValue()) { + if (MI->isDebugInstr()) { ++InstrLimit; // Don't count debug instructions continue; } @@ -561,15 +561,15 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, // If we found an unused register there is no reason to spill it. if (!isRegUsed(SReg)) { - DEBUG(dbgs() << "Scavenged register: " << printReg(SReg, TRI) << "\n"); + LLVM_DEBUG(dbgs() << "Scavenged register: " << printReg(SReg, TRI) << "\n"); return SReg; } ScavengedInfo &Scavenged = spill(SReg, *RC, SPAdj, I, UseMI); Scavenged.Restore = &*std::prev(UseMI); - DEBUG(dbgs() << "Scavenged register (with spill): " << printReg(SReg, TRI) - << "\n"); + LLVM_DEBUG(dbgs() << "Scavenged register (with spill): " + << printReg(SReg, TRI) << "\n"); return SReg; } @@ -594,14 +594,15 @@ unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator ReloadAfter = RestoreAfter ? std::next(MBBI) : MBBI; MachineBasicBlock::iterator ReloadBefore = std::next(ReloadAfter); - DEBUG(dbgs() << "Reload before: " << *ReloadBefore << '\n'); + LLVM_DEBUG(dbgs() << "Reload before: " << *ReloadBefore << '\n'); ScavengedInfo &Scavenged = spill(Reg, RC, SPAdj, SpillBefore, ReloadBefore); Scavenged.Restore = &*std::prev(SpillBefore); LiveUnits.removeReg(Reg); - DEBUG(dbgs() << "Scavenged register with spill: " << printReg(Reg, TRI) - << " until " << *SpillBefore); + LLVM_DEBUG(dbgs() << "Scavenged register with spill: " << printReg(Reg, TRI) + << " until " << *SpillBefore); } else { - DEBUG(dbgs() << "Scavenged free register: " << printReg(Reg, TRI) << '\n'); + LLVM_DEBUG(dbgs() << "Scavenged free register: " << printReg(Reg, TRI) + << '\n'); } return Reg; } @@ -757,8 +758,8 @@ void llvm::scavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger &RS) { bool Again = scavengeFrameVirtualRegsInBlock(MRI, RS, MBB); if (Again) { - DEBUG(dbgs() << "Warning: Required two scavenging passes for block " - << MBB.getName() << '\n'); + LLVM_DEBUG(dbgs() << "Warning: Required two scavenging passes for block " + << MBB.getName() << '\n'); Again = scavengeFrameVirtualRegsInBlock(MRI, RS, MBB); // The target required a 2nd run (because it created new vregs while // spilling). Refuse to do another pass to keep compiletime in check. diff --git a/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp b/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp index 4e42deb406e1..6a31118cc562 100644 --- a/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp @@ -31,8 +31,6 @@ using namespace llvm; -#define DEBUG_TYPE "ip-regalloc" - static cl::opt<bool> DumpRegUsage( "print-regusage", cl::init(false), cl::Hidden, cl::desc("print register usage details collected for analysis.")); @@ -42,7 +40,9 @@ INITIALIZE_PASS(PhysicalRegisterUsageInfo, "reg-usage-info", char PhysicalRegisterUsageInfo::ID = 0; -void PhysicalRegisterUsageInfo::anchor() {} +void PhysicalRegisterUsageInfo::setTargetMachine(const TargetMachine &TM) { + this->TM = &TM; +} bool PhysicalRegisterUsageInfo::doInitialization(Module &M) { RegMasks.grow(M.size()); @@ -58,22 +58,19 @@ bool PhysicalRegisterUsageInfo::doFinalization(Module &M) { } void PhysicalRegisterUsageInfo::storeUpdateRegUsageInfo( - const Function *FP, std::vector<uint32_t> RegMask) { - assert(FP != nullptr && "Function * can't be nullptr."); - RegMasks[FP] = std::move(RegMask); + const Function &FP, ArrayRef<uint32_t> RegMask) { + RegMasks[&FP] = RegMask; } -const std::vector<uint32_t> * -PhysicalRegisterUsageInfo::getRegUsageInfo(const Function *FP) { - auto It = RegMasks.find(FP); +ArrayRef<uint32_t> +PhysicalRegisterUsageInfo::getRegUsageInfo(const Function &FP) { + auto It = RegMasks.find(&FP); if (It != RegMasks.end()) - return &(It->second); - return nullptr; + return makeArrayRef<uint32_t>(It->second); + return ArrayRef<uint32_t>(); } void PhysicalRegisterUsageInfo::print(raw_ostream &OS, const Module *M) const { - const TargetRegisterInfo *TRI; - using FuncPtrRegMaskPair = std::pair<const Function *, std::vector<uint32_t>>; SmallVector<const FuncPtrRegMaskPair *, 64> FPRMPairVector; @@ -83,7 +80,7 @@ void PhysicalRegisterUsageInfo::print(raw_ostream &OS, const Module *M) const { FPRMPairVector.push_back(&RegMask); // sort the vector to print analysis in alphabatic order of function name. - std::sort( + llvm::sort( FPRMPairVector.begin(), FPRMPairVector.end(), [](const FuncPtrRegMaskPair *A, const FuncPtrRegMaskPair *B) -> bool { return A->first->getName() < B->first->getName(); @@ -92,8 +89,9 @@ void PhysicalRegisterUsageInfo::print(raw_ostream &OS, const Module *M) const { for (const FuncPtrRegMaskPair *FPRMPair : FPRMPairVector) { OS << FPRMPair->first->getName() << " " << "Clobbered Registers: "; - TRI = TM->getSubtarget<TargetSubtargetInfo>(*(FPRMPair->first)) - .getRegisterInfo(); + const TargetRegisterInfo *TRI + = TM->getSubtarget<TargetSubtargetInfo>(*(FPRMPair->first)) + .getRegisterInfo(); for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) { if (MachineOperand::clobbersPhysReg(&(FPRMPair->second[0]), PReg)) diff --git a/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp b/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp index 1e1f36a35ecc..156d1c81c238 100644 --- a/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp +++ b/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp @@ -77,20 +77,20 @@ private: /// Split unrelated subregister components and rename them to new vregs. bool renameComponents(LiveInterval &LI) const; - /// \brief Build a vector of SubRange infos and a union find set of + /// Build a vector of SubRange infos and a union find set of /// equivalence classes. /// Returns true if more than 1 equivalence class was found. bool findComponents(IntEqClasses &Classes, SmallVectorImpl<SubRangeInfo> &SubRangeInfos, LiveInterval &LI) const; - /// \brief Distribute the LiveInterval segments into the new LiveIntervals + /// Distribute the LiveInterval segments into the new LiveIntervals /// belonging to their class. void distribute(const IntEqClasses &Classes, const SmallVectorImpl<SubRangeInfo> &SubRangeInfos, const SmallVectorImpl<LiveInterval*> &Intervals) const; - /// \brief Constructs main liverange and add missing undef+dead flags. + /// Constructs main liverange and add missing undef+dead flags. void computeMainRangesFixFlags(const IntEqClasses &Classes, const SmallVectorImpl<SubRangeInfo> &SubRangeInfos, const SmallVectorImpl<LiveInterval*> &Intervals) const; @@ -134,17 +134,17 @@ bool RenameIndependentSubregs::renameComponents(LiveInterval &LI) const { const TargetRegisterClass *RegClass = MRI->getRegClass(Reg); SmallVector<LiveInterval*, 4> Intervals; Intervals.push_back(&LI); - DEBUG(dbgs() << printReg(Reg) << ": Found " << Classes.getNumClasses() - << " equivalence classes.\n"); - DEBUG(dbgs() << printReg(Reg) << ": Splitting into newly created:"); + LLVM_DEBUG(dbgs() << printReg(Reg) << ": Found " << Classes.getNumClasses() + << " equivalence classes.\n"); + LLVM_DEBUG(dbgs() << printReg(Reg) << ": Splitting into newly created:"); for (unsigned I = 1, NumClasses = Classes.getNumClasses(); I < NumClasses; ++I) { unsigned NewVReg = MRI->createVirtualRegister(RegClass); LiveInterval &NewLI = LIS->createEmptyInterval(NewVReg); Intervals.push_back(&NewLI); - DEBUG(dbgs() << ' ' << printReg(NewVReg)); + LLVM_DEBUG(dbgs() << ' ' << printReg(NewVReg)); } - DEBUG(dbgs() << '\n'); + LLVM_DEBUG(dbgs() << '\n'); rewriteOperands(Classes, SubRangeInfos, Intervals); distribute(Classes, SubRangeInfos, Intervals); @@ -219,7 +219,8 @@ void RenameIndependentSubregs::rewriteOperands(const IntEqClasses &Classes, if (!MO.isDef() && !MO.readsReg()) continue; - SlotIndex Pos = LIS->getInstructionIndex(*MO.getParent()); + auto *MI = MO.getParent(); + SlotIndex Pos = LIS->getInstructionIndex(*MI); Pos = MO.isDef() ? Pos.getRegSlot(MO.isEarlyClobber()) : Pos.getBaseIndex(); unsigned SubRegIdx = MO.getSubReg(); @@ -245,11 +246,14 @@ void RenameIndependentSubregs::rewriteOperands(const IntEqClasses &Classes, MO.setReg(VReg); if (MO.isTied() && Reg != VReg) { - /// Undef use operands are not tracked in the equivalence class but need - /// to be update if they are tied. - MO.getParent()->substituteRegister(Reg, VReg, 0, TRI); - - // substituteRegister breaks the iterator, so restart. + /// Undef use operands are not tracked in the equivalence class, + /// but need to be updated if they are tied; take care to only + /// update the tied operand. + unsigned OperandNo = MI->getOperandNo(&MO); + unsigned TiedIdx = MI->findTiedOperandIdx(OperandNo); + MI->getOperand(TiedIdx).setReg(VReg); + + // above substitution breaks the iterator, so restart. I = MRI->reg_nodbg_begin(Reg); } } @@ -376,8 +380,8 @@ bool RenameIndependentSubregs::runOnMachineFunction(MachineFunction &MF) { if (!MRI->subRegLivenessEnabled()) return false; - DEBUG(dbgs() << "Renaming independent subregister live ranges in " - << MF.getName() << '\n'); + LLVM_DEBUG(dbgs() << "Renaming independent subregister live ranges in " + << MF.getName() << '\n'); LIS = &getAnalysis<LiveIntervals>(); TII = MF.getSubtarget().getInstrInfo(); diff --git a/contrib/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp b/contrib/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp index f1885aa74285..a02302e6ff99 100644 --- a/contrib/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp +++ b/contrib/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp @@ -13,9 +13,12 @@ /// happen is that the MachineFunction has the FailedISel property. //===----------------------------------------------------------------------===// +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/StackProtector.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/Support/Debug.h" @@ -42,12 +45,23 @@ namespace { StringRef getPassName() const override { return "ResetMachineFunction"; } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addPreserved<StackProtector>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + bool runOnMachineFunction(MachineFunction &MF) override { + // No matter what happened, whether we successfully selected the function + // or not, nothing is going to use the vreg types after us. Make sure they + // disappear. + auto ClearVRegTypesOnReturn = + make_scope_exit([&MF]() { MF.getRegInfo().clearVirtRegTypes(); }); + if (MF.getProperties().hasProperty( MachineFunctionProperties::Property::FailedISel)) { if (AbortOnFailedISel) report_fatal_error("Instruction selection failed"); - DEBUG(dbgs() << "Reseting: " << MF.getName() << '\n'); + LLVM_DEBUG(dbgs() << "Resetting: " << MF.getName() << '\n'); ++NumFunctionsReset; MF.reset(); if (EmitFallbackDiag) { @@ -65,7 +79,7 @@ namespace { char ResetMachineFunction::ID = 0; INITIALIZE_PASS(ResetMachineFunction, DEBUG_TYPE, - "reset machine function if ISel failed", false, false) + "Reset machine function if ISel failed", false, false) MachineFunctionPass * llvm::createResetMachineFunctionPass(bool EmitFallbackDiag = false, diff --git a/contrib/llvm/lib/CodeGen/SafeStack.cpp b/contrib/llvm/lib/CodeGen/SafeStack.cpp index 51233be521be..cbbbf7c385aa 100644 --- a/contrib/llvm/lib/CodeGen/SafeStack.cpp +++ b/contrib/llvm/lib/CodeGen/SafeStack.cpp @@ -24,10 +24,12 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -61,7 +63,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/Cloning.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -88,6 +90,13 @@ STATISTIC(NumUnsafeStackRestorePoints, "Number of setjmps and landingpads"); } // namespace llvm +/// Use __safestack_pointer_address even if the platform has a faster way of +/// access safe stack pointer. +static cl::opt<bool> + SafeStackUsePointerAddress("safestack-use-pointer-address", + cl::init(false), cl::Hidden); + + namespace { /// Rewrite an SCEV expression for a memory access address to an expression that @@ -134,14 +143,14 @@ class SafeStack { /// might expect to appear on the stack on most common targets. enum { StackAlignment = 16 }; - /// \brief Return the value of the stack canary. + /// Return the value of the stack canary. Value *getStackGuard(IRBuilder<> &IRB, Function &F); - /// \brief Load stack guard from the frame and check if it has changed. + /// Load stack guard from the frame and check if it has changed. void checkStackGuard(IRBuilder<> &IRB, Function &F, ReturnInst &RI, AllocaInst *StackGuardSlot, Value *StackGuard); - /// \brief Find all static allocas, dynamic allocas, return instructions and + /// Find all static allocas, dynamic allocas, return instructions and /// stack restore points (exception unwind blocks and setjmp calls) in the /// given function and append them to the respective vectors. void findInsts(Function &F, SmallVectorImpl<AllocaInst *> &StaticAllocas, @@ -150,11 +159,11 @@ class SafeStack { SmallVectorImpl<ReturnInst *> &Returns, SmallVectorImpl<Instruction *> &StackRestorePoints); - /// \brief Calculate the allocation size of a given alloca. Returns 0 if the + /// Calculate the allocation size of a given alloca. Returns 0 if the /// size can not be statically determined. uint64_t getStaticAllocaAllocationSize(const AllocaInst* AI); - /// \brief Allocate space for all static allocas in \p StaticAllocas, + /// Allocate space for all static allocas in \p StaticAllocas, /// replace allocas with pointers into the unsafe stack and generate code to /// restore the stack pointer before all return instructions in \p Returns. /// @@ -167,7 +176,7 @@ class SafeStack { Instruction *BasePointer, AllocaInst *StackGuardSlot); - /// \brief Generate code to restore the stack after all stack restore points + /// Generate code to restore the stack after all stack restore points /// in \p StackRestorePoints. /// /// \returns A local variable in which to maintain the dynamic top of the @@ -177,7 +186,7 @@ class SafeStack { ArrayRef<Instruction *> StackRestorePoints, Value *StaticTop, bool NeedDynamicTop); - /// \brief Replace all allocas in \p DynamicAllocas with code to allocate + /// Replace all allocas in \p DynamicAllocas with code to allocate /// space dynamically on the unsafe stack and store the dynamic unsafe stack /// top to \p DynamicTop if non-null. void moveDynamicAllocasToUnsafeStack(Function &F, Value *UnsafeStackPtr, @@ -191,6 +200,9 @@ class SafeStack { bool IsAccessSafe(Value *Addr, uint64_t Size, const Value *AllocaPtr, uint64_t AllocaSize); + bool ShouldInlinePointerAddress(CallSite &CS); + void TryInlinePointerAddress(); + public: SafeStack(Function &F, const TargetLoweringBase &TL, const DataLayout &DL, ScalarEvolution &SE) @@ -230,16 +242,17 @@ bool SafeStack::IsAccessSafe(Value *Addr, uint64_t AccessSize, ConstantRange(APInt(BitWidth, 0), APInt(BitWidth, AllocaSize)); bool Safe = AllocaRange.contains(AccessRange); - DEBUG(dbgs() << "[SafeStack] " - << (isa<AllocaInst>(AllocaPtr) ? "Alloca " : "ByValArgument ") - << *AllocaPtr << "\n" - << " Access " << *Addr << "\n" - << " SCEV " << *Expr - << " U: " << SE.getUnsignedRange(Expr) - << ", S: " << SE.getSignedRange(Expr) << "\n" - << " Range " << AccessRange << "\n" - << " AllocaRange " << AllocaRange << "\n" - << " " << (Safe ? "safe" : "unsafe") << "\n"); + LLVM_DEBUG( + dbgs() << "[SafeStack] " + << (isa<AllocaInst>(AllocaPtr) ? "Alloca " : "ByValArgument ") + << *AllocaPtr << "\n" + << " Access " << *Addr << "\n" + << " SCEV " << *Expr + << " U: " << SE.getUnsignedRange(Expr) + << ", S: " << SE.getSignedRange(Expr) << "\n" + << " Range " << AccessRange << "\n" + << " AllocaRange " << AllocaRange << "\n" + << " " << (Safe ? "safe" : "unsafe") << "\n"); return Safe; } @@ -286,8 +299,9 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) { case Instruction::Store: if (V == I->getOperand(0)) { // Stored the pointer - conservatively assume it may be unsafe. - DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr - << "\n store of address: " << *I << "\n"); + LLVM_DEBUG(dbgs() + << "[SafeStack] Unsafe alloca: " << *AllocaPtr + << "\n store of address: " << *I << "\n"); return false; } @@ -312,9 +326,9 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) { if (const MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) { if (!IsMemIntrinsicSafe(MI, UI, AllocaPtr, AllocaSize)) { - DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr - << "\n unsafe memintrinsic: " << *I - << "\n"); + LLVM_DEBUG(dbgs() + << "[SafeStack] Unsafe alloca: " << *AllocaPtr + << "\n unsafe memintrinsic: " << *I << "\n"); return false; } continue; @@ -332,8 +346,8 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) { if (A->get() == V) if (!(CS.doesNotCapture(A - B) && (CS.doesNotAccessMemory(A - B) || CS.doesNotAccessMemory()))) { - DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr - << "\n unsafe call: " << *I << "\n"); + LLVM_DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr + << "\n unsafe call: " << *I << "\n"); return false; } continue; @@ -545,6 +559,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack( for (Argument *Arg : ByValArguments) { unsigned Offset = SSL.getObjectOffset(Arg); + unsigned Align = SSL.getObjectAlignment(Arg); Type *Ty = Arg->getType()->getPointerElementType(); uint64_t Size = DL.getTypeStoreSize(Ty); @@ -561,7 +576,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack( DIExpression::NoDeref, -Offset, DIExpression::NoDeref); Arg->replaceAllUsesWith(NewArg); IRB.SetInsertPoint(cast<Instruction>(NewArg)->getNextNode()); - IRB.CreateMemCpy(Off, Arg, Size, Arg->getParamAlignment()); + IRB.CreateMemCpy(Off, Align, Arg, Arg->getParamAlignment(), Size); } // Allocate space for every unsafe static AllocaInst on the unsafe stack. @@ -695,6 +710,35 @@ void SafeStack::moveDynamicAllocasToUnsafeStack( } } +bool SafeStack::ShouldInlinePointerAddress(CallSite &CS) { + Function *Callee = CS.getCalledFunction(); + if (CS.hasFnAttr(Attribute::AlwaysInline) && isInlineViable(*Callee)) + return true; + if (Callee->isInterposable() || Callee->hasFnAttribute(Attribute::NoInline) || + CS.isNoInline()) + return false; + return true; +} + +void SafeStack::TryInlinePointerAddress() { + if (!isa<CallInst>(UnsafeStackPtr)) + return; + + if(F.hasFnAttribute(Attribute::OptimizeNone)) + return; + + CallSite CS(UnsafeStackPtr); + Function *Callee = CS.getCalledFunction(); + if (!Callee || Callee->isDeclaration()) + return; + + if (!ShouldInlinePointerAddress(CS)) + return; + + InlineFunctionInfo IFI; + InlineFunction(CS, IFI); +} + bool SafeStack::run() { assert(F.hasFnAttribute(Attribute::SafeStack) && "Can't run SafeStack on a function without the attribute"); @@ -731,7 +775,13 @@ bool SafeStack::run() { ++NumUnsafeStackRestorePointsFunctions; IRBuilder<> IRB(&F.front(), F.begin()->getFirstInsertionPt()); - UnsafeStackPtr = TL.getSafeStackPointerLocation(IRB); + if (SafeStackUsePointerAddress) { + Value *Fn = F.getParent()->getOrInsertFunction( + "__safestack_pointer_address", StackPtrTy->getPointerTo(0)); + UnsafeStackPtr = IRB.CreateCall(Fn); + } else { + UnsafeStackPtr = TL.getSafeStackPointerLocation(IRB); + } // Load the current stack pointer (we'll also use it as a base pointer). // FIXME: use a dedicated register for it ? @@ -779,7 +829,9 @@ bool SafeStack::run() { IRB.CreateStore(BasePointer, UnsafeStackPtr); } - DEBUG(dbgs() << "[SafeStack] safestack applied\n"); + TryInlinePointerAddress(); + + LLVM_DEBUG(dbgs() << "[SafeStack] safestack applied\n"); return true; } @@ -800,17 +852,17 @@ public: } bool runOnFunction(Function &F) override { - DEBUG(dbgs() << "[SafeStack] Function: " << F.getName() << "\n"); + LLVM_DEBUG(dbgs() << "[SafeStack] Function: " << F.getName() << "\n"); if (!F.hasFnAttribute(Attribute::SafeStack)) { - DEBUG(dbgs() << "[SafeStack] safestack is not requested" - " for this function\n"); + LLVM_DEBUG(dbgs() << "[SafeStack] safestack is not requested" + " for this function\n"); return false; } if (F.isDeclaration()) { - DEBUG(dbgs() << "[SafeStack] function definition" - " is not available\n"); + LLVM_DEBUG(dbgs() << "[SafeStack] function definition" + " is not available\n"); return false; } diff --git a/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp b/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp index 072e6e090e1e..329458778a98 100644 --- a/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp +++ b/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp @@ -12,6 +12,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Instruction.h" @@ -101,10 +102,10 @@ void StackColoring::collectMarkers() { // For each basic block, compute // * the list of markers in the instruction order // * the sets of allocas whose lifetime starts or ends in this BB - DEBUG(dbgs() << "Instructions:\n"); + LLVM_DEBUG(dbgs() << "Instructions:\n"); unsigned InstNo = 0; for (BasicBlock *BB : depth_first(&F)) { - DEBUG(dbgs() << " " << InstNo << ": BB " << BB->getName() << "\n"); + LLVM_DEBUG(dbgs() << " " << InstNo << ": BB " << BB->getName() << "\n"); unsigned BBStart = InstNo++; BlockLifetimeInfo &BlockInfo = BlockLiveness[BB]; @@ -121,9 +122,9 @@ void StackColoring::collectMarkers() { } auto ProcessMarker = [&](Instruction *I, const Marker &M) { - DEBUG(dbgs() << " " << InstNo << ": " - << (M.IsStart ? "start " : "end ") << M.AllocaNo << ", " - << *I << "\n"); + LLVM_DEBUG(dbgs() << " " << InstNo << ": " + << (M.IsStart ? "start " : "end ") << M.AllocaNo + << ", " << *I << "\n"); BBMarkers[BB].push_back({InstNo, M}); @@ -280,7 +281,7 @@ LLVM_DUMP_METHOD void StackColoring::dumpLiveRanges() { #endif void StackColoring::run() { - DEBUG(dumpAllocas()); + LLVM_DEBUG(dumpAllocas()); for (unsigned I = 0; I < NumAllocas; ++I) AllocaNumbering[Allocas[I]] = I; @@ -303,7 +304,7 @@ void StackColoring::run() { LiveRanges[I] = getFullLiveRange(); calculateLocalLiveness(); - DEBUG(dumpBlockLiveness()); + LLVM_DEBUG(dumpBlockLiveness()); calculateLiveIntervals(); - DEBUG(dumpLiveRanges()); + LLVM_DEBUG(dumpLiveRanges()); } diff --git a/contrib/llvm/lib/CodeGen/SafeStackLayout.cpp b/contrib/llvm/lib/CodeGen/SafeStackLayout.cpp index b1759359e46f..07b6a5d1883b 100644 --- a/contrib/llvm/lib/CodeGen/SafeStackLayout.cpp +++ b/contrib/llvm/lib/CodeGen/SafeStackLayout.cpp @@ -42,6 +42,7 @@ LLVM_DUMP_METHOD void StackLayout::print(raw_ostream &OS) { void StackLayout::addObject(const Value *V, unsigned Size, unsigned Alignment, const StackColoring::LiveRange &Range) { StackObjects.push_back({V, Size, Alignment, Range}); + ObjectAlignments[V] = Alignment; MaxAlignment = std::max(MaxAlignment, Alignment); } @@ -62,30 +63,30 @@ void StackLayout::layoutObject(StackObject &Obj) { return; } - DEBUG(dbgs() << "Layout: size " << Obj.Size << ", align " << Obj.Alignment - << ", range " << Obj.Range << "\n"); + LLVM_DEBUG(dbgs() << "Layout: size " << Obj.Size << ", align " + << Obj.Alignment << ", range " << Obj.Range << "\n"); assert(Obj.Alignment <= MaxAlignment); unsigned Start = AdjustStackOffset(0, Obj.Size, Obj.Alignment); unsigned End = Start + Obj.Size; - DEBUG(dbgs() << " First candidate: " << Start << " .. " << End << "\n"); + LLVM_DEBUG(dbgs() << " First candidate: " << Start << " .. " << End << "\n"); for (const StackRegion &R : Regions) { - DEBUG(dbgs() << " Examining region: " << R.Start << " .. " << R.End - << ", range " << R.Range << "\n"); + LLVM_DEBUG(dbgs() << " Examining region: " << R.Start << " .. " << R.End + << ", range " << R.Range << "\n"); assert(End >= R.Start); if (Start >= R.End) { - DEBUG(dbgs() << " Does not intersect, skip.\n"); + LLVM_DEBUG(dbgs() << " Does not intersect, skip.\n"); continue; } if (Obj.Range.Overlaps(R.Range)) { // Find the next appropriate location. Start = AdjustStackOffset(R.End, Obj.Size, Obj.Alignment); End = Start + Obj.Size; - DEBUG(dbgs() << " Overlaps. Next candidate: " << Start << " .. " << End - << "\n"); + LLVM_DEBUG(dbgs() << " Overlaps. Next candidate: " << Start << " .. " + << End << "\n"); continue; } if (End <= R.End) { - DEBUG(dbgs() << " Reusing region(s).\n"); + LLVM_DEBUG(dbgs() << " Reusing region(s).\n"); break; } } @@ -94,13 +95,13 @@ void StackLayout::layoutObject(StackObject &Obj) { if (End > LastRegionEnd) { // Insert a new region at the end. Maybe two. if (Start > LastRegionEnd) { - DEBUG(dbgs() << " Creating gap region: " << LastRegionEnd << " .. " - << Start << "\n"); + LLVM_DEBUG(dbgs() << " Creating gap region: " << LastRegionEnd << " .. " + << Start << "\n"); Regions.emplace_back(LastRegionEnd, Start, StackColoring::LiveRange()); LastRegionEnd = Start; } - DEBUG(dbgs() << " Creating new region: " << LastRegionEnd << " .. " << End - << ", range " << Obj.Range << "\n"); + LLVM_DEBUG(dbgs() << " Creating new region: " << LastRegionEnd << " .. " + << End << ", range " << Obj.Range << "\n"); Regions.emplace_back(LastRegionEnd, End, Obj.Range); LastRegionEnd = End; } @@ -149,5 +150,5 @@ void StackLayout::computeLayout() { for (auto &Obj : StackObjects) layoutObject(Obj); - DEBUG(print(dbgs())); + LLVM_DEBUG(print(dbgs())); } diff --git a/contrib/llvm/lib/CodeGen/SafeStackLayout.h b/contrib/llvm/lib/CodeGen/SafeStackLayout.h index 7c1292f251f7..ac531d800f6e 100644 --- a/contrib/llvm/lib/CodeGen/SafeStackLayout.h +++ b/contrib/llvm/lib/CodeGen/SafeStackLayout.h @@ -47,6 +47,7 @@ class StackLayout { SmallVector<StackObject, 8> StackObjects; DenseMap<const Value *, unsigned> ObjectOffsets; + DenseMap<const Value *, unsigned> ObjectAlignments; void layoutObject(StackObject &Obj); @@ -64,6 +65,9 @@ public: /// Returns the offset to the object start in the stack frame. unsigned getObjectOffset(const Value *V) { return ObjectOffsets[V]; } + /// Returns the alignment of the object + unsigned getObjectAlignment(const Value *V) { return ObjectAlignments[V]; } + /// Returns the size of the entire frame. unsigned getFrameSize() { return Regions.empty() ? 0 : Regions.back().End; } diff --git a/contrib/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/contrib/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp index cef413f9d410..9387722bfebd 100644 --- a/contrib/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp +++ b/contrib/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp @@ -586,9 +586,6 @@ static void scalarizeMaskedScatter(CallInst *CI) { } bool ScalarizeMaskedMemIntrin::runOnFunction(Function &F) { - if (skipFunction(F)) - return false; - bool EverMadeChange = false; TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp index 0635e8f41ee7..46064012d9d8 100644 --- a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp +++ b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index 9249fa84b38b..d1c5ddabb975 100644 --- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -38,6 +38,7 @@ #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instruction.h" @@ -118,7 +119,7 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, DbgValues.clear(); const TargetSubtargetInfo &ST = mf.getSubtarget(); - SchedModel.init(ST.getSchedModel(), &ST, TII); + SchedModel.init(&ST); } /// If this machine instr has memory reference information and it can be @@ -266,7 +267,7 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { } } -/// \brief Adds register dependencies (data, anti, and output) from this SUnit +/// Adds register dependencies (data, anti, and output) from this SUnit /// to following instructions in the same scheduling region that depend the /// physical register referenced at OperIdx. void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { @@ -317,13 +318,14 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { } else { addPhysRegDataDeps(SU, OperIdx); - // clear this register's use list - if (Uses.contains(Reg)) - Uses.eraseAll(Reg); - - if (!MO.isDead()) { - Defs.eraseAll(Reg); - } else if (SU->isCall) { + // Clear previous uses and defs of this register and its subergisters. + for (MCSubRegIterator SubReg(Reg, TRI, true); SubReg.isValid(); ++SubReg) { + if (Uses.contains(*SubReg)) + Uses.eraseAll(*SubReg); + if (!MO.isDead()) + Defs.eraseAll(*SubReg); + } + if (MO.isDead() && SU->isCall) { // Calls will not be reordered because of chain dependencies (see // below). Since call operands are dead, calls may continue to be added // to the DefList making dependence checking quadratic in the size of @@ -468,7 +470,7 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU)); } -/// \brief Adds a register data dependency if the instruction that defines the +/// Adds a register data dependency if the instruction that defines the /// virtual register used at OperIdx is mapped to an SUnit. Add a register /// antidependency from this SUnit to instructions that occur later in the same /// scheduling region if they write the virtual register. @@ -514,7 +516,7 @@ void ScheduleDAGInstrs::addChainDependency (SUnit *SUa, SUnit *SUb, } } -/// \brief Creates an SUnit for each real instruction, numbered in top-down +/// Creates an SUnit for each real instruction, numbered in top-down /// topological order. The instruction order A < B, implies that no edge exists /// from B to A. /// @@ -532,7 +534,7 @@ void ScheduleDAGInstrs::initSUnits() { SUnits.reserve(NumRegionInstrs); for (MachineInstr &MI : make_range(RegionBegin, RegionEnd)) { - if (MI.isDebugValue()) + if (MI.isDebugInstr()) continue; SUnit *SU = newSUnit(&MI); @@ -763,6 +765,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, DbgMI = &MI; continue; } + if (MI.isDebugLabel()) + continue; + SUnit *SU = MISUnitMap[&MI]; assert(SU && "No SUnit mapped to this MI"); @@ -845,8 +850,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, BarrierChain->addPredBarrier(SU); BarrierChain = SU; - DEBUG(dbgs() << "Global memory object and new barrier chain: SU(" - << BarrierChain->NodeNum << ").\n";); + LLVM_DEBUG(dbgs() << "Global memory object and new barrier chain: SU(" + << BarrierChain->NodeNum << ").\n";); // Add dependencies against everything below it and clear maps. addBarrierChain(Stores); @@ -934,11 +939,12 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // Reduce maps if they grow huge. if (Stores.size() + Loads.size() >= HugeRegion) { - DEBUG(dbgs() << "Reducing Stores and Loads maps.\n";); + LLVM_DEBUG(dbgs() << "Reducing Stores and Loads maps.\n";); reduceHugeMemNodeMaps(Stores, Loads, getReductionSize()); } if (NonAliasStores.size() + NonAliasLoads.size() >= HugeRegion) { - DEBUG(dbgs() << "Reducing NonAliasStores and NonAliasLoads maps.\n";); + LLVM_DEBUG( + dbgs() << "Reducing NonAliasStores and NonAliasLoads maps.\n";); reduceHugeMemNodeMaps(NonAliasStores, NonAliasLoads, getReductionSize()); } } @@ -978,10 +984,8 @@ void ScheduleDAGInstrs::Value2SUsMap::dump() { void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores, Value2SUsMap &loads, unsigned N) { - DEBUG(dbgs() << "Before reduction:\nStoring SUnits:\n"; - stores.dump(); - dbgs() << "Loading SUnits:\n"; - loads.dump()); + LLVM_DEBUG(dbgs() << "Before reduction:\nStoring SUnits:\n"; stores.dump(); + dbgs() << "Loading SUnits:\n"; loads.dump()); // Insert all SU's NodeNums into a vector and sort it. std::vector<unsigned> NodeNums; @@ -992,7 +996,7 @@ void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores, for (auto &I : loads) for (auto *SU : I.second) NodeNums.push_back(SU->NodeNum); - std::sort(NodeNums.begin(), NodeNums.end()); + llvm::sort(NodeNums.begin(), NodeNums.end()); // The N last elements in NodeNums will be removed, and the SU with // the lowest NodeNum of them will become the new BarrierChain to @@ -1007,12 +1011,12 @@ void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores, if (newBarrierChain->NodeNum < BarrierChain->NodeNum) { BarrierChain->addPredBarrier(newBarrierChain); BarrierChain = newBarrierChain; - DEBUG(dbgs() << "Inserting new barrier chain: SU(" - << BarrierChain->NodeNum << ").\n";); + LLVM_DEBUG(dbgs() << "Inserting new barrier chain: SU(" + << BarrierChain->NodeNum << ").\n";); } else - DEBUG(dbgs() << "Keeping old barrier chain: SU(" - << BarrierChain->NodeNum << ").\n";); + LLVM_DEBUG(dbgs() << "Keeping old barrier chain: SU(" + << BarrierChain->NodeNum << ").\n";); } else BarrierChain = newBarrierChain; @@ -1020,10 +1024,8 @@ void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores, insertBarrierChain(stores); insertBarrierChain(loads); - DEBUG(dbgs() << "After reduction:\nStoring SUnits:\n"; - stores.dump(); - dbgs() << "Loading SUnits:\n"; - loads.dump()); + LLVM_DEBUG(dbgs() << "After reduction:\nStoring SUnits:\n"; stores.dump(); + dbgs() << "Loading SUnits:\n"; loads.dump()); } static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs, @@ -1044,14 +1046,14 @@ static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs, } void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) { - DEBUG(dbgs() << "Fixup kills for " << printMBBReference(MBB) << '\n'); + LLVM_DEBUG(dbgs() << "Fixup kills for " << printMBBReference(MBB) << '\n'); LiveRegs.init(*TRI); LiveRegs.addLiveOuts(MBB); // Examine block from end to start... for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) { - if (MI.isDebugValue()) + if (MI.isDebugInstr()) continue; // Update liveness. Registers that are defed but not used in this @@ -1087,7 +1089,7 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) { while (I->isBundledWithSucc()) ++I; do { - if (!I->isDebugValue()) + if (!I->isDebugInstr()) toggleKills(MRI, LiveRegs, *I, true); --I; } while(I != First); @@ -1212,7 +1214,7 @@ public: RootSet[SU->NodeNum] = RData; } - /// \brief Called once for each tree edge after calling visitPostOrderNode on + /// Called once for each tree edge after calling visitPostOrderNode on /// the predecessor. Increment the parent node's instruction count and /// preemptively join this subtree to its parent's if it is small enough. void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) { @@ -1245,11 +1247,11 @@ public: } R.SubtreeConnections.resize(SubtreeClasses.getNumClasses()); R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses()); - DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n"); + LLVM_DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n"); for (unsigned Idx = 0, End = R.DFSNodeData.size(); Idx != End; ++Idx) { R.DFSNodeData[Idx].SubtreeID = SubtreeClasses[Idx]; - DEBUG(dbgs() << " SU(" << Idx << ") in tree " - << R.DFSNodeData[Idx].SubtreeID << '\n'); + LLVM_DEBUG(dbgs() << " SU(" << Idx << ") in tree " + << R.DFSNodeData[Idx].SubtreeID << '\n'); } for (const std::pair<const SUnit*, const SUnit*> &P : ConnectionPairs) { unsigned PredTree = SubtreeClasses[P.first->NodeNum]; @@ -1404,8 +1406,8 @@ void SchedDFSResult::scheduleTree(unsigned SubtreeID) { for (const Connection &C : SubtreeConnections[SubtreeID]) { SubtreeConnectLevels[C.TreeID] = std::max(SubtreeConnectLevels[C.TreeID], C.Level); - DEBUG(dbgs() << " Tree: " << C.TreeID - << " @" << SubtreeConnectLevels[C.TreeID] << '\n'); + LLVM_DEBUG(dbgs() << " Tree: " << C.TreeID << " @" + << SubtreeConnectLevels[C.TreeID] << '\n'); } } diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp index 37c4a470bd0a..ff2085aae865 100644 --- a/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -61,7 +61,7 @@ namespace llvm { } - std::string getNodeLabel(const SUnit *Node, const ScheduleDAG *Graph); + std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *Graph); static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *Graph) { return "shape=Mrecord"; diff --git a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp index b789e2d9c52c..b8bfe69a76e1 100644 --- a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp +++ b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp @@ -16,6 +16,7 @@ #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/Config/llvm-config.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/Compiler.h" @@ -68,12 +69,12 @@ ScoreboardHazardRecognizer::ScoreboardHazardRecognizer( // If MaxLookAhead is not set above, then we are not enabled. if (!isEnabled()) - DEBUG(dbgs() << "Disabled scoreboard hazard recognizer\n"); + LLVM_DEBUG(dbgs() << "Disabled scoreboard hazard recognizer\n"); else { // A nonempty itinerary must have a SchedModel. IssueWidth = ItinData->SchedModel.IssueWidth; - DEBUG(dbgs() << "Using scoreboard hazard recognizer: Depth = " - << ScoreboardDepth << '\n'); + LLVM_DEBUG(dbgs() << "Using scoreboard hazard recognizer: Depth = " + << ScoreboardDepth << '\n'); } } @@ -155,9 +156,9 @@ ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { } if (!freeUnits) { - DEBUG(dbgs() << "*** Hazard in cycle +" << StageCycle << ", "); - DEBUG(dbgs() << "SU(" << SU->NodeNum << "): "); - DEBUG(DAG->dumpNode(SU)); + LLVM_DEBUG(dbgs() << "*** Hazard in cycle +" << StageCycle << ", "); + LLVM_DEBUG(dbgs() << "SU(" << SU->NodeNum << "): "); + LLVM_DEBUG(DAG->dumpNode(SU)); return Hazard; } } @@ -223,8 +224,8 @@ void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) { cycle += IS->getNextCycles(); } - DEBUG(ReservedScoreboard.dump()); - DEBUG(RequiredScoreboard.dump()); + LLVM_DEBUG(ReservedScoreboard.dump()); + LLVM_DEBUG(RequiredScoreboard.dump()); } void ScoreboardHazardRecognizer::AdvanceCycle() { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 03cb2e310c7e..7a99687757f8 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -36,7 +36,6 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" @@ -60,6 +59,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" +#include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" @@ -122,7 +122,7 @@ namespace { bool LegalTypes = false; bool ForCodeSize; - /// \brief Worklist of all of the nodes that need to be simplified. + /// Worklist of all of the nodes that need to be simplified. /// /// This must behave as a stack -- new nodes to process are pushed onto the /// back and when processing we pop off of the back. @@ -131,14 +131,14 @@ namespace { /// due to nodes being deleted from the underlying DAG. SmallVector<SDNode *, 64> Worklist; - /// \brief Mapping from an SDNode to its position on the worklist. + /// Mapping from an SDNode to its position on the worklist. /// /// This is used to find and remove nodes from the worklist (by nulling /// them) when they are deleted from the underlying DAG. It relies on /// stable indices of nodes within the worklist. DenseMap<SDNode *, unsigned> WorklistMap; - /// \brief Set of nodes which have been combined (at least once). + /// Set of nodes which have been combined (at least once). /// /// This is used to allow us to reliably add any operands of a DAG node /// which have not yet been combined to the worklist. @@ -232,14 +232,25 @@ namespace { return SimplifyDemandedBits(Op, Demanded); } + /// Check the specified vector node value to see if it can be simplified or + /// if things it uses can be simplified as it only uses some of the + /// elements. If so, return true. + bool SimplifyDemandedVectorElts(SDValue Op) { + unsigned NumElts = Op.getValueType().getVectorNumElements(); + APInt Demanded = APInt::getAllOnesValue(NumElts); + return SimplifyDemandedVectorElts(Op, Demanded); + } + bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded); + bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded, + bool AssumeSingleUse = false); bool CombineToPreIndexedLoadStore(SDNode *N); bool CombineToPostIndexedLoadStore(SDNode *N); SDValue SplitIndexingFromLoad(LoadSDNode *LD); bool SliceUpLoad(SDNode *N); - /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed + /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed /// load. /// /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced. @@ -258,10 +269,6 @@ namespace { SDValue PromoteExtend(SDValue Op); bool PromoteLoad(SDValue Op); - void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, SDValue Trunc, - SDValue ExtLoad, const SDLoc &DL, - ISD::NodeType ExtType); - /// Call the node-specific routine that knows how to fold each /// particular type of node. If that doesn't do anything, try the /// target-specific DAG combines. @@ -292,7 +299,9 @@ namespace { SDValue visitMUL(SDNode *N); SDValue useDivRem(SDNode *N); SDValue visitSDIV(SDNode *N); + SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N); SDValue visitUDIV(SDNode *N); + SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N); SDValue visitREM(SDNode *N); SDValue visitMULHU(SDNode *N); SDValue visitMULHS(SDNode *N); @@ -302,9 +311,9 @@ namespace { SDValue visitUMULO(SDNode *N); SDValue visitIMINMAX(SDNode *N); SDValue visitAND(SDNode *N); - SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference); + SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N); SDValue visitOR(SDNode *N); - SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference); + SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N); SDValue visitXOR(SDNode *N); SDValue SimplifyVBinOp(SDNode *N); SDValue visitSHL(SDNode *N); @@ -323,7 +332,6 @@ namespace { SDValue visitVSELECT(SDNode *N); SDValue visitSELECT_CC(SDNode *N); SDValue visitSETCC(SDNode *N); - SDValue visitSETCCE(SDNode *N); SDValue visitSETCCCARRY(SDNode *N); SDValue visitSIGN_EXTEND(SDNode *N); SDValue visitZERO_EXTEND(SDNode *N); @@ -385,8 +393,8 @@ namespace { SDValue visitFMULForFMADistributiveCombine(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); - SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS, - SDValue RHS); + SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, + SDValue N1); SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt); @@ -403,8 +411,11 @@ namespace { SDValue N2, SDValue N3, ISD::CondCode CC); SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, const SDLoc &DL); + SDValue unfoldMaskedMerge(SDNode *N); + SDValue unfoldExtremeBitClearingToShifts(SDNode *N); SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, - const SDLoc &DL, bool foldBooleans = true); + const SDLoc &DL, bool foldBooleans); + SDValue rebuildSetCC(SDValue N); bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, SDValue &CC) const; @@ -414,20 +425,21 @@ namespace { unsigned HiOp); SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); SDValue CombineExtLoad(SDNode *N); + SDValue CombineZExtLogicopShiftLoad(SDNode *N); SDValue combineRepeatedFPDivisors(SDNode *N); SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex); SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); SDValue BuildSDIV(SDNode *N); SDValue BuildSDIVPow2(SDNode *N); SDValue BuildUDIV(SDNode *N); - SDValue BuildLogBase2(SDValue Op, const SDLoc &DL); + SDValue BuildLogBase2(SDValue V, const SDLoc &DL); SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags); SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags); SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags); SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip); - SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations, + SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations, SDNodeFlags Flags, bool Reciprocal); - SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations, + SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations, SDNodeFlags Flags, bool Reciprocal); SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); @@ -442,13 +454,14 @@ namespace { SDValue ReduceLoadOpStoreWidth(SDNode *N); SDValue splitMergedValStore(StoreSDNode *ST); SDValue TransformFPLoadStorePair(SDNode *N); + SDValue convertBuildVecZextToZext(SDNode *N); SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N); SDValue reduceBuildVecToShuffle(SDNode *N); SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N, ArrayRef<int> VectorMask, SDValue VecIn1, SDValue VecIn2, unsigned LeftIdx); - SDValue matchVSelectOpSizesWithSetCC(SDNode *N); + SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast); /// Walk up chain skipping non-aliasing memory nodes, /// looking for aliasing nodes and adding them to the Aliases vector. @@ -500,15 +513,15 @@ namespace { bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, EVT LoadResultTy, EVT &ExtVT); - /// Helper function to calculate whether the given Load can have its + /// Helper function to calculate whether the given Load/Store can have its /// width reduced to ExtVT. - bool isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType, - EVT &ExtVT, unsigned ShAmt = 0); + bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType, + EVT &MemVT, unsigned ShAmt = 0); /// Used by BackwardsPropagateMask to find suitable loads. bool SearchForAndLoads(SDNode *N, SmallPtrSetImpl<LoadSDNode*> &Loads, - SmallPtrSetImpl<SDNode*> &NodeWithConsts, - ConstantSDNode *Mask, SDNode *&UncombinedNode); + SmallPtrSetImpl<SDNode*> &NodesWithConsts, + ConstantSDNode *Mask, SDNode *&NodeToMask); /// Attempt to propagate a given AND node back to load leaves so that they /// can be combined into narrow loads. bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG); @@ -530,23 +543,28 @@ namespace { /// This is a helper function for MergeConsecutiveStores. Stores /// that potentially may be merged with St are placed in - /// StoreNodes. + /// StoreNodes. RootNode is a chain predecessor to all store + /// candidates. void getStoreMergeCandidates(StoreSDNode *St, - SmallVectorImpl<MemOpLink> &StoreNodes); + SmallVectorImpl<MemOpLink> &StoreNodes, + SDNode *&Root); /// Helper function for MergeConsecutiveStores. Checks if /// candidate stores have indirect dependency through their - /// operands. \return True if safe to merge. + /// operands. RootNode is the predecessor to all stores calculated + /// by getStoreMergeCandidates and is used to prune the dependency check. + /// \return True if safe to merge. bool checkMergeStoreCandidatesForDependencies( - SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores); + SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores, + SDNode *RootNode); /// Merge consecutive store operations into a wide store. /// This optimization uses wide integers or vectors when possible. /// \return number of stores that were merged into a merged store (the /// affected nodes are stored as a prefix in \p StoreNodes). - bool MergeConsecutiveStores(StoreSDNode *N); + bool MergeConsecutiveStores(StoreSDNode *St); - /// \brief Try to transform a truncation where C is a constant: + /// Try to transform a truncation where C is a constant: /// (trunc (and X, C)) -> (and (trunc X), (trunc C)) /// /// \p N needs to be a truncation and its first operand an AND. Other @@ -554,6 +572,16 @@ namespace { /// single-use) and if missed an empty SDValue is returned. SDValue distributeTruncateThroughAnd(SDNode *N); + /// Helper function to determine whether the target supports operation + /// given by \p Opcode for type \p VT, that is, whether the operation + /// is legal or custom before legalizing operations, and whether is + /// legal (but not custom) after legalization. + bool hasOperation(unsigned Opcode, EVT VT) { + if (LegalOperations) + return TLI.isOperationLegal(Opcode, VT); + return TLI.isOperationLegalOrCustom(Opcode, VT); + } + public: /// Runs the dag combiner on all nodes in the work list void Run(CombineLevel AtLevel); @@ -564,11 +592,7 @@ namespace { /// legalization these can be huge. EVT getShiftAmountTy(EVT LHSTy) { assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); - if (LHSTy.isVector()) - return LHSTy; - auto &DL = DAG.getDataLayout(); - return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy) - : TLI.getPointerTy(DL); + return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes); } /// This method returns true if we are running before type legalization or @@ -582,6 +606,10 @@ namespace { EVT getSetCCResultType(EVT VT) const { return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); } + + void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, + SDValue OrigLoad, SDValue ExtLoad, + ISD::NodeType ExtType); }; /// This class is a DAGUpdateListener that removes any deleted @@ -657,8 +685,13 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations, // fneg is removable even if it has multiple uses. if (Op.getOpcode() == ISD::FNEG) return 2; - // Don't allow anything with multiple uses. - if (!Op.hasOneUse()) return 0; + // Don't allow anything with multiple uses unless we know it is free. + EVT VT = Op.getValueType(); + const SDNodeFlags Flags = Op->getFlags(); + if (!Op.hasOneUse()) + if (!(Op.getOpcode() == ISD::FP_EXTEND && + TLI.isFPExtFree(VT, Op.getOperand(0).getValueType()))) + return 0; // Don't recurse exponentially. if (Depth > 6) return 0; @@ -671,17 +704,15 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations, // Don't invert constant FP values after legalization unless the target says // the negated constant is legal. - EVT VT = Op.getValueType(); return TLI.isOperationLegal(ISD::ConstantFP, VT) || TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT); } case ISD::FADD: - // FIXME: determine better conditions for this xform. - if (!Options->UnsafeFPMath) return 0; + if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros()) + return 0; // After operation legalization, it might not be legal to create new FSUBs. - if (LegalOperations && - !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) + if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) return 0; // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) @@ -694,7 +725,7 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations, case ISD::FSUB: // We can't turn -(A-B) into B-A when we honor signed zeros. if (!Options->NoSignedZerosFPMath && - !Op.getNode()->getFlags().hasNoSignedZeros()) + !Flags.hasNoSignedZeros()) return 0; // fold (fneg (fsub A, B)) -> (fsub B, A) @@ -702,8 +733,6 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations, case ISD::FMUL: case ISD::FDIV: - if (Options->HonorSignDependentRoundingFPMath()) return 0; - // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options, Depth + 1)) @@ -727,9 +756,6 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, // fneg is removable even if it has multiple uses. if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0); - // Don't allow anything with multiple uses. - assert(Op.hasOneUse() && "Unknown reuse!"); - assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); const SDNodeFlags Flags = Op.getNode()->getFlags(); @@ -742,8 +768,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType()); } case ISD::FADD: - // FIXME: determine better conditions for this xform. - assert(Options.UnsafeFPMath); + assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros()); // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) if (isNegatibleForFree(Op.getOperand(0), LegalOperations, @@ -769,8 +794,6 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, case ISD::FMUL: case ISD::FDIV: - assert(!Options.HonorSignDependentRoundingFPMath()); - // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) if (isNegatibleForFree(Op.getOperand(0), LegalOperations, DAG.getTargetLoweringInfo(), &Options, Depth+1)) @@ -846,7 +869,13 @@ bool DAGCombiner::isOneUseSetCC(SDValue N) const { return false; } -// \brief Returns the SDNode if it is a constant float BuildVector +static SDValue peekThroughBitcast(SDValue V) { + while (V.getOpcode() == ISD::BITCAST) + V = V.getOperand(0); + return V; +} + +// Returns the SDNode if it is a constant float BuildVector // or constant float. static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) { if (isa<ConstantFPSDNode>(N)) @@ -880,6 +909,7 @@ static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) { // constant null integer (with no undefs). // Build vector implicit truncation is not an issue for null values. static bool isNullConstantOrNullSplatConstant(SDValue N) { + // TODO: may want to use peekThroughBitcast() here. if (ConstantSDNode *Splat = isConstOrConstSplat(N)) return Splat->isNullValue(); return false; @@ -889,6 +919,7 @@ static bool isNullConstantOrNullSplatConstant(SDValue N) { // constant integer of one (with no undefs). // Do not permit build vector implicit truncation. static bool isOneConstantOrOneSplatConstant(SDValue N) { + // TODO: may want to use peekThroughBitcast() here. unsigned BitWidth = N.getScalarValueSizeInBits(); if (ConstantSDNode *Splat = isConstOrConstSplat(N)) return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth; @@ -899,6 +930,7 @@ static bool isOneConstantOrOneSplatConstant(SDValue N) { // constant integer of all ones (with no undefs). // Do not permit build vector implicit truncation. static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) { + N = peekThroughBitcast(N); unsigned BitWidth = N.getScalarValueSizeInBits(); if (ConstantSDNode *Splat = isConstOrConstSplat(N)) return Splat->isAllOnesValue() && @@ -913,56 +945,6 @@ static bool isAnyConstantBuildVector(const SDNode *N) { ISD::isBuildVectorOfConstantFPSDNodes(N); } -// Attempt to match a unary predicate against a scalar/splat constant or -// every element of a constant BUILD_VECTOR. -static bool matchUnaryPredicate(SDValue Op, - std::function<bool(ConstantSDNode *)> Match) { - if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) - return Match(Cst); - - if (ISD::BUILD_VECTOR != Op.getOpcode()) - return false; - - EVT SVT = Op.getValueType().getScalarType(); - for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { - auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i)); - if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst)) - return false; - } - return true; -} - -// Attempt to match a binary predicate against a pair of scalar/splat constants -// or every element of a pair of constant BUILD_VECTORs. -static bool matchBinaryPredicate( - SDValue LHS, SDValue RHS, - std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match) { - if (LHS.getValueType() != RHS.getValueType()) - return false; - - if (auto *LHSCst = dyn_cast<ConstantSDNode>(LHS)) - if (auto *RHSCst = dyn_cast<ConstantSDNode>(RHS)) - return Match(LHSCst, RHSCst); - - if (ISD::BUILD_VECTOR != LHS.getOpcode() || - ISD::BUILD_VECTOR != RHS.getOpcode()) - return false; - - EVT SVT = LHS.getValueType().getScalarType(); - for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) { - auto *LHSCst = dyn_cast<ConstantSDNode>(LHS.getOperand(i)); - auto *RHSCst = dyn_cast<ConstantSDNode>(RHS.getOperand(i)); - if (!LHSCst || !RHSCst) - return false; - if (LHSCst->getValueType(0) != SVT || - LHSCst->getValueType(0) != RHSCst->getValueType(0)) - return false; - if (!Match(LHSCst, RHSCst)) - return false; - } - return true; -} - SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, SDValue N1) { EVT VT = N0.getValueType(); @@ -1013,11 +995,9 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, bool AddTo) { assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); ++NodesCombined; - DEBUG(dbgs() << "\nReplacing.1 "; - N->dump(&DAG); - dbgs() << "\nWith: "; - To[0].getNode()->dump(&DAG); - dbgs() << " and " << NumTo-1 << " other values\n"); + LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: "; + To[0].getNode()->dump(&DAG); + dbgs() << " and " << NumTo - 1 << " other values\n"); for (unsigned i = 0, e = NumTo; i != e; ++i) assert((!To[i].getNode() || N->getValueType(i) == To[i].getValueType()) && @@ -1074,11 +1054,33 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { // Replace the old value with the new one. ++NodesCombined; - DEBUG(dbgs() << "\nReplacing.2 "; - TLO.Old.getNode()->dump(&DAG); - dbgs() << "\nWith: "; - TLO.New.getNode()->dump(&DAG); - dbgs() << '\n'); + LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG); + dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG); + dbgs() << '\n'); + + CommitTargetLoweringOpt(TLO); + return true; +} + +/// Check the specified vector node value to see if it can be simplified or +/// if things it uses can be simplified as it only uses some of the elements. +/// If so, return true. +bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded, + bool AssumeSingleUse) { + TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); + APInt KnownUndef, KnownZero; + if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO, + 0, AssumeSingleUse)) + return false; + + // Revisit the node. + AddToWorklist(Op.getNode()); + + // Replace the old value with the new one. + ++NodesCombined; + LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG); + dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG); + dbgs() << '\n'); CommitTargetLoweringOpt(TLO); return true; @@ -1089,11 +1091,8 @@ void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { EVT VT = Load->getValueType(0); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0)); - DEBUG(dbgs() << "\nReplacing.9 "; - Load->dump(&DAG); - dbgs() << "\nWith: "; - Trunc.getNode()->dump(&DAG); - dbgs() << '\n'); + LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: "; + Trunc.getNode()->dump(&DAG); dbgs() << '\n'); WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc); DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1)); @@ -1107,10 +1106,8 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { if (ISD::isUNINDEXEDLoad(Op.getNode())) { LoadSDNode *LD = cast<LoadSDNode>(Op); EVT MemVT = LD->getMemoryVT(); - ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) - ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD - : ISD::EXTLOAD) - : LD->getExtensionType(); + ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD + : LD->getExtensionType(); Replace = true; return DAG.getExtLoad(ExtType, DL, PVT, LD->getChain(), LD->getBasePtr(), @@ -1194,7 +1191,7 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { if (TLI.IsDesirableToPromoteOp(Op, PVT)) { assert(PVT != VT && "Don't know what type to promote to!"); - DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); + LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); bool Replace0 = false; SDValue N0 = Op.getOperand(0); @@ -1259,7 +1256,7 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { if (TLI.IsDesirableToPromoteOp(Op, PVT)) { assert(PVT != VT && "Don't know what type to promote to!"); - DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); + LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); bool Replace = false; SDValue N0 = Op.getOperand(0); @@ -1311,8 +1308,7 @@ SDValue DAGCombiner::PromoteExtend(SDValue Op) { // fold (aext (aext x)) -> (aext x) // fold (aext (zext x)) -> (zext x) // fold (aext (sext x)) -> (sext x) - DEBUG(dbgs() << "\nPromoting "; - Op.getNode()->dump(&DAG)); + LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0)); } return SDValue(); @@ -1345,20 +1341,15 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { SDNode *N = Op.getNode(); LoadSDNode *LD = cast<LoadSDNode>(N); EVT MemVT = LD->getMemoryVT(); - ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) - ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD - : ISD::EXTLOAD) - : LD->getExtensionType(); + ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD + : LD->getExtensionType(); SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT, LD->getChain(), LD->getBasePtr(), MemVT, LD->getMemOperand()); SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD); - DEBUG(dbgs() << "\nPromoting "; - N->dump(&DAG); - dbgs() << "\nTo: "; - Result.getNode()->dump(&DAG); - dbgs() << '\n'); + LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: "; + Result.getNode()->dump(&DAG); dbgs() << '\n'); WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1)); @@ -1369,7 +1360,7 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { return false; } -/// \brief Recursively delete a node which has no uses and any operands for +/// Recursively delete a node which has no uses and any operands for /// which it is the only use. /// /// Note that this both deletes the nodes and removes them from the worklist. @@ -1453,7 +1444,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) { continue; } - DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG)); + LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG)); // Add any operands of the new node which have not yet been combined to the // worklist as well. Because the worklist uniques things already, this @@ -1481,8 +1472,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) { RV.getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"); - DEBUG(dbgs() << " ... into: "; - RV.getNode()->dump(&DAG)); + LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG)); if (N->getNumValues() == RV.getNode()->getNumValues()) DAG.ReplaceAllUsesWith(N, RV.getNode()); @@ -1558,7 +1548,6 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::VSELECT: return visitVSELECT(N); case ISD::SELECT_CC: return visitSELECT_CC(N); case ISD::SETCC: return visitSETCC(N); - case ISD::SETCCE: return visitSETCCE(N); case ISD::SETCCCARRY: return visitSETCCCARRY(N); case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); @@ -1708,6 +1697,10 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { return N->getOperand(1); } + // Don't simplify token factors if optnone. + if (OptLevel == CodeGenOpt::None) + return SDValue(); + SmallVector<SDNode *, 8> TFs; // List of token factors to visit. SmallVector<SDValue, 8> Ops; // Ops for replacing token factor. SmallPtrSet<SDNode*, 16> SeenOps; @@ -1893,16 +1886,16 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) && "Unexpected binary operator"); - // Bail out if any constants are opaque because we can't constant fold those. - SDValue C1 = BO->getOperand(1); - if (!isConstantOrConstantVector(C1, true) && - !isConstantFPBuildVectorOrConstantFP(C1)) - return SDValue(); - // Don't do this unless the old select is going away. We want to eliminate the // binary operator, not replace a binop with a select. // TODO: Handle ISD::SELECT_CC. + unsigned SelOpNo = 0; SDValue Sel = BO->getOperand(0); + if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) { + SelOpNo = 1; + Sel = BO->getOperand(1); + } + if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) return SDValue(); @@ -1916,19 +1909,48 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { !isConstantFPBuildVectorOrConstantFP(CF)) return SDValue(); + // Bail out if any constants are opaque because we can't constant fold those. + // The exception is "and" and "or" with either 0 or -1 in which case we can + // propagate non constant operands into select. I.e.: + // and (select Cond, 0, -1), X --> select Cond, 0, X + // or X, (select Cond, -1, 0) --> select Cond, -1, X + bool CanFoldNonConst = (BinOpcode == ISD::AND || BinOpcode == ISD::OR) && + (isNullConstantOrNullSplatConstant(CT) || + isAllOnesConstantOrAllOnesSplatConstant(CT)) && + (isNullConstantOrNullSplatConstant(CF) || + isAllOnesConstantOrAllOnesSplatConstant(CF)); + + SDValue CBO = BO->getOperand(SelOpNo ^ 1); + if (!CanFoldNonConst && + !isConstantOrConstantVector(CBO, true) && + !isConstantFPBuildVectorOrConstantFP(CBO)) + return SDValue(); + + EVT VT = Sel.getValueType(); + + // In case of shift value and shift amount may have different VT. For instance + // on x86 shift amount is i8 regardles of LHS type. Bail out if we have + // swapped operands and value types do not match. NB: x86 is fine if operands + // are not swapped with shift amount VT being not bigger than shifted value. + // TODO: that is possible to check for a shift operation, correct VTs and + // still perform optimization on x86 if needed. + if (SelOpNo && VT != CBO.getValueType()) + return SDValue(); + // We have a select-of-constants followed by a binary operator with a // constant. Eliminate the binop by pulling the constant math into the select. - // Example: add (select Cond, CT, CF), C1 --> select Cond, CT + C1, CF + C1 - EVT VT = Sel.getValueType(); + // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO SDLoc DL(Sel); - SDValue NewCT = DAG.getNode(BinOpcode, DL, VT, CT, C1); - if (!NewCT.isUndef() && + SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT) + : DAG.getNode(BinOpcode, DL, VT, CT, CBO); + if (!CanFoldNonConst && !NewCT.isUndef() && !isConstantOrConstantVector(NewCT, true) && !isConstantFPBuildVectorOrConstantFP(NewCT)) return SDValue(); - SDValue NewCF = DAG.getNode(BinOpcode, DL, VT, CF, C1); - if (!NewCF.isUndef() && + SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF) + : DAG.getNode(BinOpcode, DL, VT, CF, CBO); + if (!CanFoldNonConst && !NewCF.isUndef() && !isConstantOrConstantVector(NewCF, true) && !isConstantFPBuildVectorOrConstantFP(NewCF)) return SDValue(); @@ -1936,6 +1958,84 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF); } +static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) { + assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && + "Expecting add or sub"); + + // Match a constant operand and a zext operand for the math instruction: + // add Z, C + // sub C, Z + bool IsAdd = N->getOpcode() == ISD::ADD; + SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0); + SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1); + auto *CN = dyn_cast<ConstantSDNode>(C); + if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND) + return SDValue(); + + // Match the zext operand as a setcc of a boolean. + if (Z.getOperand(0).getOpcode() != ISD::SETCC || + Z.getOperand(0).getValueType() != MVT::i1) + return SDValue(); + + // Match the compare as: setcc (X & 1), 0, eq. + SDValue SetCC = Z.getOperand(0); + ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get(); + if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) || + SetCC.getOperand(0).getOpcode() != ISD::AND || + !isOneConstant(SetCC.getOperand(0).getOperand(1))) + return SDValue(); + + // We are adding/subtracting a constant and an inverted low bit. Turn that + // into a subtract/add of the low bit with incremented/decremented constant: + // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1)) + // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1)) + EVT VT = C.getValueType(); + SDLoc DL(N); + SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT); + SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) : + DAG.getConstant(CN->getAPIntValue() - 1, DL, VT); + return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit); +} + +/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into +/// a shift and add with a different constant. +static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) { + assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && + "Expecting add or sub"); + + // We need a constant operand for the add/sub, and the other operand is a + // logical shift right: add (srl), C or sub C, (srl). + bool IsAdd = N->getOpcode() == ISD::ADD; + SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0); + SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1); + ConstantSDNode *C = isConstOrConstSplat(ConstantOp); + if (!C || ShiftOp.getOpcode() != ISD::SRL) + return SDValue(); + + // The shift must be of a 'not' value. + // TODO: Use isBitwiseNot() if it works with vectors. + SDValue Not = ShiftOp.getOperand(0); + if (!Not.hasOneUse() || Not.getOpcode() != ISD::XOR || + !isAllOnesConstantOrAllOnesSplatConstant(Not.getOperand(1))) + return SDValue(); + + // The shift must be moving the sign bit to the least-significant-bit. + EVT VT = ShiftOp.getValueType(); + SDValue ShAmt = ShiftOp.getOperand(1); + ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt); + if (!ShAmtC || ShAmtC->getZExtValue() != VT.getScalarSizeInBits() - 1) + return SDValue(); + + // Eliminate the 'not' by adjusting the shift and add/sub constant: + // add (srl (not X), 31), C --> add (sra X, 31), (C + 1) + // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1) + SDLoc DL(N); + auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL; + SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt); + APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1; + return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT)); +} + SDValue DAGCombiner::visitADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -2067,6 +2167,12 @@ SDValue DAGCombiner::visitADD(SDNode *N) { DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11)); } + if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG)) + return V; + + if (SDValue V = foldAddSubOfSignBit(N, DAG)) + return V; + if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); @@ -2075,6 +2181,11 @@ SDValue DAGCombiner::visitADD(SDNode *N) { DAG.haveNoCommonBitsSet(N0, N1)) return DAG.getNode(ISD::OR, DL, VT, N0, N1); + // fold (add (xor a, -1), 1) -> (sub 0, a) + if (isBitwiseNot(N0) && isOneConstantOrOneSplatConstant(N1)) + return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), + N0.getOperand(0)); + if (SDValue Combined = visitADDLike(N0, N1, N)) return Combined; @@ -2210,6 +2321,38 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { return SDValue(); } +static SDValue flipBoolean(SDValue V, const SDLoc &DL, EVT VT, + SelectionDAG &DAG, const TargetLowering &TLI) { + SDValue Cst; + switch (TLI.getBooleanContents(VT)) { + case TargetLowering::ZeroOrOneBooleanContent: + case TargetLowering::UndefinedBooleanContent: + Cst = DAG.getConstant(1, DL, VT); + break; + case TargetLowering::ZeroOrNegativeOneBooleanContent: + Cst = DAG.getConstant(-1, DL, VT); + break; + } + + return DAG.getNode(ISD::XOR, DL, VT, V, Cst); +} + +static bool isBooleanFlip(SDValue V, EVT VT, const TargetLowering &TLI) { + if (V.getOpcode() != ISD::XOR) return false; + ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V.getOperand(1)); + if (!Const) return false; + + switch(TLI.getBooleanContents(VT)) { + case TargetLowering::ZeroOrOneBooleanContent: + return Const->isOne(); + case TargetLowering::ZeroOrNegativeOneBooleanContent: + return Const->isAllOnesValue(); + case TargetLowering::UndefinedBooleanContent: + return (Const->getAPIntValue() & 0x01) == 1; + } + llvm_unreachable("Unsupported boolean content"); +} + SDValue DAGCombiner::visitUADDO(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -2240,6 +2383,15 @@ SDValue DAGCombiner::visitUADDO(SDNode *N) { return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1), DAG.getConstant(0, DL, CarryVT)); + // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry. + if (isBitwiseNot(N0) && isOneConstantOrOneSplatConstant(N1)) { + SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(), + DAG.getConstant(0, DL, VT), + N0.getOperand(0)); + return CombineTo(N, Sub, + flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI)); + } + if (SDValue Combined = visitUADDOLike(N0, N1, N)) return Combined; @@ -2303,13 +2455,17 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) { return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn); // fold (addcarry x, y, false) -> (uaddo x, y) - if (isNullConstant(CarryIn)) - return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1); + if (isNullConstant(CarryIn)) { + if (!LegalOperations || + TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0))) + return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1); + } + + EVT CarryVT = CarryIn.getValueType(); // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry. if (isNullConstant(N0) && isNullConstant(N1)) { EVT VT = N0.getValueType(); - EVT CarryVT = CarryIn.getValueType(); SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT); AddToWorklist(CarryExt.getNode()); return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt, @@ -2317,6 +2473,16 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) { DAG.getConstant(0, DL, CarryVT)); } + // fold (addcarry (xor a, -1), 0, !b) -> (subcarry 0, a, b) and flip carry. + if (isBitwiseNot(N0) && isNullConstant(N1) && + isBooleanFlip(CarryIn, CarryVT, TLI)) { + SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), + DAG.getConstant(0, DL, N0.getValueType()), + N0.getOperand(0), CarryIn.getOperand(0)); + return CombineTo(N, Sub, + flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI)); + } + if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N)) return Combined; @@ -2458,6 +2624,11 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (isAllOnesConstantOrAllOnesSplatConstant(N0)) return DAG.getNode(ISD::XOR, DL, VT, N1, N0); + // fold (A - (0-B)) -> A+B + if (N1.getOpcode() == ISD::SUB && + isNullConstantOrNullSplatConstant(N1.getOperand(0))) + return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1)); + // fold A-(A-B) -> B if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0)) return N1.getOperand(1); @@ -2500,12 +2671,50 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N0.getOperand(1).getOperand(0)); + // fold (X - (-Y * Z)) -> (X + (Y * Z)) + if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) { + if (N1.getOperand(0).getOpcode() == ISD::SUB && + isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0))) { + SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, + N1.getOperand(0).getOperand(1), + N1.getOperand(1)); + return DAG.getNode(ISD::ADD, DL, VT, N0, Mul); + } + if (N1.getOperand(1).getOpcode() == ISD::SUB && + isNullConstantOrNullSplatConstant(N1.getOperand(1).getOperand(0))) { + SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, + N1.getOperand(0), + N1.getOperand(1).getOperand(1)); + return DAG.getNode(ISD::ADD, DL, VT, N0, Mul); + } + } + // If either operand of a sub is undef, the result is undef if (N0.isUndef()) return N0; if (N1.isUndef()) return N1; + if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG)) + return V; + + if (SDValue V = foldAddSubOfSignBit(N, DAG)) + return V; + + // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X) + if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) { + if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) { + SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1); + SDValue S0 = N1.getOperand(0); + if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) { + unsigned OpSizeInBits = VT.getScalarSizeInBits(); + if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1))) + if (C->getAPIntValue() == (OpSizeInBits - 1)) + return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0); + } + } + } + // If the relocation model supports it, consider symbol offsets. if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { @@ -2612,8 +2821,11 @@ SDValue DAGCombiner::visitSUBCARRY(SDNode *N) { SDValue CarryIn = N->getOperand(2); // fold (subcarry x, y, false) -> (usubo x, y) - if (isNullConstant(CarryIn)) - return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1); + if (isNullConstant(CarryIn)) { + if (!LegalOperations || + TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0))) + return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1); + } return SDValue(); } @@ -2689,11 +2901,8 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { (!VT.isVector() || Level <= AfterLegalizeVectorOps)) { SDLoc DL(N); SDValue LogBase2 = BuildLogBase2(N1, DL); - AddToWorklist(LogBase2.getNode()); - EVT ShiftVT = getShiftAmountTy(N0.getValueType()); SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT); - AddToWorklist(Trunc.getNode()); return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc); } // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c @@ -2816,9 +3025,10 @@ SDValue DAGCombiner::useDivRem(SDNode *Node) { SDValue Op1 = Node->getOperand(1); SDValue combined; for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), - UE = Op0.getNode()->use_end(); UI != UE;) { - SDNode *User = *UI++; - if (User == Node || User->use_empty()) + UE = Op0.getNode()->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; + if (User == Node || User->getOpcode() == ISD::DELETED_NODE || + User->use_empty()) continue; // Convert the other matching node(s), too; // otherwise, the DIVREM may get target-legalized into something @@ -2868,6 +3078,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); + EVT CCVT = getSetCCResultType(VT); // fold vector ops if (VT.isVector()) @@ -2887,6 +3098,11 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { // fold (sdiv X, -1) -> 0-X if (N1C && N1C->isAllOnesValue()) return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0); + // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0) + if (N1C && N1C->getAPIntValue().isMinSignedValue()) + return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), + DAG.getConstant(1, DL, VT), + DAG.getConstant(0, DL, VT)); if (SDValue V = simplifyDivRem(N, DAG)) return V; @@ -2899,45 +3115,90 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1); + if (SDValue V = visitSDIVLike(N0, N1, N)) + return V; + + // sdiv, srem -> sdivrem + // If the divisor is constant, then return DIVREM only if isIntDivCheap() is + // true. Otherwise, we break the simplification logic in visitREM(). + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); + if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) + if (SDValue DivRem = useDivRem(N)) + return DivRem; + + return SDValue(); +} + +SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + EVT CCVT = getSetCCResultType(VT); + unsigned BitWidth = VT.getScalarSizeInBits(); + + ConstantSDNode *N1C = isConstOrConstSplat(N1); + + // Helper for determining whether a value is a power-2 constant scalar or a + // vector of such elements. + auto IsPowerOfTwo = [](ConstantSDNode *C) { + if (C->isNullValue() || C->isOpaque()) + return false; + if (C->getAPIntValue().isPowerOf2()) + return true; + if ((-C->getAPIntValue()).isPowerOf2()) + return true; + return false; + }; + // fold (sdiv X, pow2) -> simple ops after legalize // FIXME: We check for the exact bit here because the generic lowering gives // better results in that case. The target-specific lowering should learn how // to handle exact sdivs efficiently. - if (N1C && !N1C->isNullValue() && !N1C->isOpaque() && - !N->getFlags().hasExact() && (N1C->getAPIntValue().isPowerOf2() || - (-N1C->getAPIntValue()).isPowerOf2())) { + if (!N->getFlags().hasExact() && + ISD::matchUnaryPredicate(N1C ? SDValue(N1C, 0) : N1, IsPowerOfTwo)) { // Target-specific implementation of sdiv x, pow2. if (SDValue Res = BuildSDIVPow2(N)) return Res; - unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); + // Create constants that are functions of the shift amount value. + EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType()); + SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy); + SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1); + C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy); + SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1); + if (!isConstantOrConstantVector(Inexact)) + return SDValue(); // Splat the sign bit into the register - SDValue SGN = - DAG.getNode(ISD::SRA, DL, VT, N0, - DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, - getShiftAmountTy(N0.getValueType()))); - AddToWorklist(SGN.getNode()); + SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0, + DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy)); + AddToWorklist(Sign.getNode()); // Add (N0 < 0) ? abs2 - 1 : 0; - SDValue SRL = - DAG.getNode(ISD::SRL, DL, VT, SGN, - DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL, - getShiftAmountTy(SGN.getValueType()))); - SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL); - AddToWorklist(SRL.getNode()); - AddToWorklist(ADD.getNode()); // Divide by pow2 - SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD, - DAG.getConstant(lg2, DL, - getShiftAmountTy(ADD.getValueType()))); - - // If we're dividing by a positive value, we're done. Otherwise, we must - // negate the result. - if (N1C->getAPIntValue().isNonNegative()) - return SRA; - - AddToWorklist(SRA.getNode()); - return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA); + SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact); + AddToWorklist(Srl.getNode()); + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl); + AddToWorklist(Add.getNode()); + SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1); + AddToWorklist(Sra.getNode()); + + // Special case: (sdiv X, 1) -> X + // Special Case: (sdiv X, -1) -> 0-X + SDValue One = DAG.getConstant(1, DL, VT); + SDValue AllOnes = DAG.getAllOnesConstant(DL, VT); + SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ); + SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ); + SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes); + Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra); + + // If dividing by a positive value, we're done. Otherwise, the result must + // be negated. + SDValue Zero = DAG.getConstant(0, DL, VT); + SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra); + + // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding. + SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT); + SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra); + return Res; } // If integer divide is expensive and we satisfy the requirements, emit an @@ -2948,13 +3209,6 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { if (SDValue Op = BuildSDIV(N)) return Op; - // sdiv, srem -> sdivrem - // If the divisor is constant, then return DIVREM only if isIntDivCheap() is - // true. Otherwise, we break the simplification logic in visitREM(). - if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) - if (SDValue DivRem = useDivRem(N)) - return DivRem; - return SDValue(); } @@ -2962,6 +3216,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); + EVT CCVT = getSetCCResultType(VT); // fold vector ops if (VT.isVector()) @@ -2977,6 +3232,14 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, N0C, N1C)) return Folded; + // fold (udiv X, 1) -> X + if (N1C && N1C->isOne()) + return N0; + // fold (udiv X, -1) -> select(X == -1, 1, 0) + if (N1C && N1C->getAPIntValue().isAllOnesValue()) + return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), + DAG.getConstant(1, DL, VT), + DAG.getConstant(0, DL, VT)); if (SDValue V = simplifyDivRem(N, DAG)) return V; @@ -2984,6 +3247,26 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; + if (SDValue V = visitUDIVLike(N0, N1, N)) + return V; + + // sdiv, srem -> sdivrem + // If the divisor is constant, then return DIVREM only if isIntDivCheap() is + // true. Otherwise, we break the simplification logic in visitREM(). + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); + if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) + if (SDValue DivRem = useDivRem(N)) + return DivRem; + + return SDValue(); +} + +SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + + ConstantSDNode *N1C = isConstOrConstSplat(N1); + // fold (udiv x, (1 << c)) -> x >>u c if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && DAG.isKnownToBeAPowerOfTwo(N1)) { @@ -3019,13 +3302,6 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { if (SDValue Op = BuildUDIV(N)) return Op; - // sdiv, srem -> sdivrem - // If the divisor is constant, then return DIVREM only if isIntDivCheap() is - // true. Otherwise, we break the simplification logic in visitREM(). - if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) - if (SDValue DivRem = useDivRem(N)) - return DivRem; - return SDValue(); } @@ -3035,6 +3311,8 @@ SDValue DAGCombiner::visitREM(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); + EVT CCVT = getSetCCResultType(VT); + bool isSigned = (Opcode == ISD::SREM); SDLoc DL(N); @@ -3044,6 +3322,10 @@ SDValue DAGCombiner::visitREM(SDNode *N) { if (N0C && N1C) if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C)) return Folded; + // fold (urem X, -1) -> select(X == -1, 0, x) + if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue()) + return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), + DAG.getConstant(0, DL, VT), N0); if (SDValue V = simplifyDivRem(N, DAG)) return V; @@ -3077,22 +3359,19 @@ SDValue DAGCombiner::visitREM(SDNode *N) { // If X/C can be simplified by the division-by-constant logic, lower // X%C to the equivalent of X-X/C*C. - // To avoid mangling nodes, this simplification requires that the combine() - // call for the speculative DIV must not cause a DIVREM conversion. We guard - // against this by skipping the simplification if isIntDivCheap(). When - // div is not cheap, combine will not return a DIVREM. Regardless, - // checking cheapness here makes sense since the simplification results in - // fatter code. - if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) { - unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV; - SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1); - AddToWorklist(Div.getNode()); - SDValue OptimizedDiv = combine(Div.getNode()); - if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { - assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) && - (OptimizedDiv.getOpcode() != ISD::SDIVREM)); + // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the + // speculative DIV must not cause a DIVREM conversion. We guard against this + // by skipping the simplification if isIntDivCheap(). When div is not cheap, + // combine will not return a DIVREM. Regardless, checking cheapness here + // makes sense since the simplification results in fatter code. + if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) { + SDValue OptimizedDiv = + isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N); + if (OptimizedDiv.getNode() && OptimizedDiv.getOpcode() != ISD::UDIVREM && + OptimizedDiv.getOpcode() != ISD::SDIVREM) { SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1); SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul); + AddToWorklist(OptimizedDiv.getNode()); AddToWorklist(Mul.getNode()); return Sub; } @@ -3350,6 +3629,25 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); + // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX. + // Only do this if the current op isn't legal and the flipped is. + unsigned Opcode = N->getOpcode(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.isOperationLegal(Opcode, VT) && + (N0.isUndef() || DAG.SignBitIsZero(N0)) && + (N1.isUndef() || DAG.SignBitIsZero(N1))) { + unsigned AltOpcode; + switch (Opcode) { + case ISD::SMIN: AltOpcode = ISD::UMIN; break; + case ISD::SMAX: AltOpcode = ISD::UMAX; break; + case ISD::UMIN: AltOpcode = ISD::SMIN; break; + case ISD::UMAX: AltOpcode = ISD::SMAX; break; + default: llvm_unreachable("Unknown MINMAX opcode"); + } + if (TLI.isOperationLegal(AltOpcode, VT)) + return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1); + } + return SDValue(); } @@ -3469,9 +3767,9 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { ShOp = SDValue(); } - // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C) - // (OR (shuf (A, C), shuf (B, C)) -> shuf (OR (A, B), C) - // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0) + // (AND (shuf (A, C), shuf (B, C))) -> shuf (AND (A, B), C) + // (OR (shuf (A, C), shuf (B, C))) -> shuf (OR (A, B), C) + // (XOR (shuf (A, C), shuf (B, C))) -> shuf (XOR (A, B), V_0) if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) { SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0->getOperand(0), N1->getOperand(0)); @@ -3490,9 +3788,9 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { ShOp = SDValue(); } - // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B)) - // (OR (shuf (C, A), shuf (C, B)) -> shuf (C, OR (A, B)) - // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B)) + // (AND (shuf (C, A), shuf (C, B))) -> shuf (C, AND (A, B)) + // (OR (shuf (C, A), shuf (C, B))) -> shuf (C, OR (A, B)) + // (XOR (shuf (C, A), shuf (C, B))) -> shuf (V_0, XOR (A, B)) if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) { SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0->getOperand(1), N1->getOperand(1)); @@ -3525,7 +3823,7 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, // operations on the left and right operands, so those types must match. EVT VT = N0.getValueType(); EVT OpVT = LL.getValueType(); - if (LegalOperations || VT != MVT::i1) + if (LegalOperations || VT.getScalarType() != MVT::i1) if (VT != getSetCCResultType(OpVT)) return SDValue(); if (OpVT != RL.getValueType()) @@ -3762,53 +4060,78 @@ bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, return true; } -bool DAGCombiner::isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType, - EVT &ExtVT, unsigned ShAmt) { - // Don't transform one with multiple uses, this would require adding a new - // load. - if (!SDValue(LoadN, 0).hasOneUse()) +bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST, + ISD::LoadExtType ExtType, EVT &MemVT, + unsigned ShAmt) { + if (!LDST) return false; - - if (LegalOperations && - !TLI.isLoadExtLegal(ExtType, LoadN->getValueType(0), ExtVT)) + // Only allow byte offsets. + if (ShAmt % 8) return false; // Do not generate loads of non-round integer types since these can // be expensive (and would be wrong if the type is not byte sized). - if (!ExtVT.isRound()) + if (!MemVT.isRound()) return false; // Don't change the width of a volatile load. - if (LoadN->isVolatile()) + if (LDST->isVolatile()) return false; // Verify that we are actually reducing a load width here. - if (LoadN->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits()) + if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits()) return false; - // For the transform to be legal, the load must produce only two values - // (the value loaded and the chain). Don't transform a pre-increment - // load, for example, which produces an extra value. Otherwise the - // transformation is not equivalent, and the downstream logic to replace - // uses gets things wrong. - if (LoadN->getNumValues() > 2) - return false; - - // If the load that we're shrinking is an extload and we're not just - // discarding the extension we can't simply shrink the load. Bail. - // TODO: It would be possible to merge the extensions in some cases. - if (LoadN->getExtensionType() != ISD::NON_EXTLOAD && - LoadN->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt) - return false; - - if (!TLI.shouldReduceLoadWidth(LoadN, ExtType, ExtVT)) + // Ensure that this isn't going to produce an unsupported unaligned access. + if (ShAmt && + !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT, + LDST->getAddressSpace(), ShAmt / 8)) return false; // It's not possible to generate a constant of extended or untyped type. - EVT PtrType = LoadN->getOperand(1).getValueType(); + EVT PtrType = LDST->getBasePtr().getValueType(); if (PtrType == MVT::Untyped || PtrType.isExtended()) return false; + if (isa<LoadSDNode>(LDST)) { + LoadSDNode *Load = cast<LoadSDNode>(LDST); + // Don't transform one with multiple uses, this would require adding a new + // load. + if (!SDValue(Load, 0).hasOneUse()) + return false; + + if (LegalOperations && + !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT)) + return false; + + // For the transform to be legal, the load must produce only two values + // (the value loaded and the chain). Don't transform a pre-increment + // load, for example, which produces an extra value. Otherwise the + // transformation is not equivalent, and the downstream logic to replace + // uses gets things wrong. + if (Load->getNumValues() > 2) + return false; + + // If the load that we're shrinking is an extload and we're not just + // discarding the extension we can't simply shrink the load. Bail. + // TODO: It would be possible to merge the extensions in some cases. + if (Load->getExtensionType() != ISD::NON_EXTLOAD && + Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt) + return false; + + if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT)) + return false; + } else { + assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode"); + StoreSDNode *Store = cast<StoreSDNode>(LDST); + // Can't write outside the original store + if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt) + return false; + + if (LegalOperations && + !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT)) + return false; + } return true; } @@ -3841,7 +4164,7 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N, auto *Load = cast<LoadSDNode>(Op); EVT ExtVT; if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) && - isLegalNarrowLoad(Load, ISD::ZEXTLOAD, ExtVT)) { + isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) { // ZEXTLOAD is already small enough. if (Load->getExtensionType() == ISD::ZEXTLOAD && @@ -3882,7 +4205,23 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N, // Allow one node which will masked along with any loads found. if (NodeToMask) return false; + + // Also ensure that the node to be masked only produces one data result. NodeToMask = Op.getNode(); + if (NodeToMask->getNumValues() > 1) { + bool HasValue = false; + for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) { + MVT VT = SDValue(NodeToMask, i).getSimpleValueType(); + if (VT != MVT::Glue && VT != MVT::Other) { + if (HasValue) { + NodeToMask = nullptr; + return false; + } + HasValue = true; + } + } + assert(HasValue && "Node to be masked has no data result?"); + } } return true; } @@ -3906,19 +4245,19 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) { if (Loads.size() == 0) return false; - DEBUG(dbgs() << "Backwards propagate AND: "; N->dump()); + LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump()); SDValue MaskOp = N->getOperand(1); // If it exists, fixup the single node we allow in the tree that needs // masking. if (FixupNode) { - DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump()); + LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump()); SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode), FixupNode->getValueType(0), SDValue(FixupNode, 0), MaskOp); DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And); - DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), - MaskOp); + if (And.getOpcode() == ISD ::AND) + DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp); } // Narrow any constants that need it. @@ -3937,11 +4276,13 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) { // Create narrow loads. for (auto *Load : Loads) { - DEBUG(dbgs() << "Propagate AND back to: "; Load->dump()); + LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump()); SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0), SDValue(Load, 0), MaskOp); DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And); - DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp); + if (And.getOpcode() == ISD ::AND) + And = SDValue( + DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0); SDValue NewLoad = ReduceLoadWidth(And.getNode()); assert(NewLoad && "Shouldn't be masking the load if it can't be narrowed"); @@ -3953,6 +4294,60 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) { return false; } +// Unfold +// x & (-1 'logical shift' y) +// To +// (x 'opposite logical shift' y) 'logical shift' y +// if it is better for performance. +SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) { + assert(N->getOpcode() == ISD::AND); + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + // Do we actually prefer shifts over mask? + if (!TLI.preferShiftsToClearExtremeBits(N0)) + return SDValue(); + + // Try to match (-1 '[outer] logical shift' y) + unsigned OuterShift; + unsigned InnerShift; // The opposite direction to the OuterShift. + SDValue Y; // Shift amount. + auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool { + if (!M.hasOneUse()) + return false; + OuterShift = M->getOpcode(); + if (OuterShift == ISD::SHL) + InnerShift = ISD::SRL; + else if (OuterShift == ISD::SRL) + InnerShift = ISD::SHL; + else + return false; + if (!isAllOnesConstant(M->getOperand(0))) + return false; + Y = M->getOperand(1); + return true; + }; + + SDValue X; + if (matchMask(N1)) + X = N0; + else if (matchMask(N0)) + X = N1; + else + return SDValue(); + + SDLoc DL(N); + EVT VT = N->getValueType(0); + + // tmp = x 'opposite logical shift' y + SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y); + // ret = tmp 'logical shift' y + SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y); + + return T1; +} + SDValue DAGCombiner::visitAND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -4019,7 +4414,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue()); }; if (N0.getOpcode() == ISD::OR && - matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset)) + ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset)) return N1; // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { @@ -4250,6 +4645,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return BSwap; } + if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N)) + return Shifts; + return SDValue(); } @@ -4276,7 +4674,10 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, if (!N0.getNode()->hasOneUse()) return SDValue(); ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); - if (!N01C || N01C->getZExtValue() != 0xFF00) + // Also handle 0xffff since the LHS is guaranteed to have zeros there. + // This is needed for X86. + if (!N01C || (N01C->getZExtValue() != 0xFF00 && + N01C->getZExtValue() != 0xFFFF)) return SDValue(); N0 = N0.getOperand(0); LookPassAnd0 = true; @@ -4323,7 +4724,10 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, if (!N10.getNode()->hasOneUse()) return SDValue(); ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1)); - if (!N101C || N101C->getZExtValue() != 0xFF00) + // Also allow 0xFFFF since the bits will be shifted out. This is needed + // for X86. + if (!N101C || (N101C->getZExtValue() != 0xFF00 && + N101C->getZExtValue() != 0xFFFF)) return SDValue(); N10 = N10.getOperand(0); LookPassAnd1 = true; @@ -4394,6 +4798,14 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) { return false; case 0xFF: MaskByteOffset = 0; break; case 0xFF00: MaskByteOffset = 1; break; + case 0xFFFF: + // In case demanded bits didn't clear the bits that will be shifted out. + // This is needed for X86. + if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) { + MaskByteOffset = 1; + break; + } + return false; case 0xFF0000: MaskByteOffset = 2; break; case 0xFF000000: MaskByteOffset = 3; break; } @@ -4708,7 +5120,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return LHS->getAPIntValue().intersects(RHS->getAPIntValue()); }; if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && - matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect)) { + ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect)) { if (SDValue COR = DAG.FoldConstantArithmetic( ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) { SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1); @@ -4764,7 +5176,8 @@ bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate // in direction shift1 by Neg. The range [0, EltSize) means that we only need // to consider shift amounts with defined behavior. -static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) { +static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, + SelectionDAG &DAG) { // If EltSize is a power of 2 then: // // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1) @@ -4799,9 +5212,13 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) { unsigned MaskLoBits = 0; if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) { if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) { - if (NegC->getAPIntValue() == EltSize - 1) { + KnownBits Known; + DAG.computeKnownBits(Neg.getOperand(0), Known); + unsigned Bits = Log2_64(EltSize); + if (NegC->getAPIntValue().getActiveBits() <= Bits && + ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) { Neg = Neg.getOperand(0); - MaskLoBits = Log2_64(EltSize); + MaskLoBits = Bits; } } } @@ -4816,10 +5233,16 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) { // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with // Pos'. The truncation is redundant for the purpose of the equality. - if (MaskLoBits && Pos.getOpcode() == ISD::AND) - if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) - if (PosC->getAPIntValue() == EltSize - 1) + if (MaskLoBits && Pos.getOpcode() == ISD::AND) { + if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) { + KnownBits Known; + DAG.computeKnownBits(Pos.getOperand(0), Known); + if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits && + ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >= + MaskLoBits)) Pos = Pos.getOperand(0); + } + } // The condition we need is now: // @@ -4875,7 +5298,7 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, // (srl x, (*ext y))) -> // (rotr x, y) or (rotl x, (sub 32, y)) EVT VT = Shifted.getValueType(); - if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) { + if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) { bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted, HasPos ? Pos : Neg).getNode(); @@ -4893,8 +5316,8 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { if (!TLI.isTypeLegal(VT)) return nullptr; // The target must have at least one rotate flavor. - bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT); - bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT); + bool HasROTL = hasOperation(ISD::ROTL, VT); + bool HasROTR = hasOperation(ISD::ROTR, VT); if (!HasROTL && !HasROTR) return nullptr; // Check for truncated rotate. @@ -4943,7 +5366,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { ConstantSDNode *RHS) { return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits; }; - if (matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) { + if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) { SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt); @@ -5200,7 +5623,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { Optional<BaseIndexOffset> Base; SDValue Chain; - SmallSet<LoadSDNode *, 8> Loads; + SmallPtrSet<LoadSDNode *, 8> Loads; Optional<ByteProvider> FirstByteProvider; int64_t FirstOffset = INT64_MAX; @@ -5299,6 +5722,88 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad; } +// If the target has andn, bsl, or a similar bit-select instruction, +// we want to unfold masked merge, with canonical pattern of: +// | A | |B| +// ((x ^ y) & m) ^ y +// | D | +// Into: +// (x & m) | (y & ~m) +// If y is a constant, and the 'andn' does not work with immediates, +// we unfold into a different pattern: +// ~(~x & m) & (m | y) +// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at +// the very least that breaks andnpd / andnps patterns, and because those +// patterns are simplified in IR and shouldn't be created in the DAG +SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) { + assert(N->getOpcode() == ISD::XOR); + + // Don't touch 'not' (i.e. where y = -1). + if (isAllOnesConstantOrAllOnesSplatConstant(N->getOperand(1))) + return SDValue(); + + EVT VT = N->getValueType(0); + + // There are 3 commutable operators in the pattern, + // so we have to deal with 8 possible variants of the basic pattern. + SDValue X, Y, M; + auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) { + if (And.getOpcode() != ISD::AND || !And.hasOneUse()) + return false; + SDValue Xor = And.getOperand(XorIdx); + if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse()) + return false; + SDValue Xor0 = Xor.getOperand(0); + SDValue Xor1 = Xor.getOperand(1); + // Don't touch 'not' (i.e. where y = -1). + if (isAllOnesConstantOrAllOnesSplatConstant(Xor1)) + return false; + if (Other == Xor0) + std::swap(Xor0, Xor1); + if (Other != Xor1) + return false; + X = Xor0; + Y = Xor1; + M = And.getOperand(XorIdx ? 0 : 1); + return true; + }; + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) && + !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0)) + return SDValue(); + + // Don't do anything if the mask is constant. This should not be reachable. + // InstCombine should have already unfolded this pattern, and DAGCombiner + // probably shouldn't produce it, too. + if (isa<ConstantSDNode>(M.getNode())) + return SDValue(); + + // We can transform if the target has AndNot + if (!TLI.hasAndNot(M)) + return SDValue(); + + SDLoc DL(N); + + // If Y is a constant, check that 'andn' works with immediates. + if (!TLI.hasAndNot(Y)) { + assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable."); + // If not, we need to do a bit more work to make sure andn is still used. + SDValue NotX = DAG.getNOT(DL, X, VT); + SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M); + SDValue NotLHS = DAG.getNOT(DL, LHS, VT); + SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y); + return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS); + } + + SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M); + SDValue NotM = DAG.getNOT(DL, M, VT); + SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM); + + return DAG.getNode(ISD::OR, DL, VT, LHS, RHS); +} + SDValue DAGCombiner::visitXOR(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -5378,7 +5883,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { } // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc - if (isOneConstant(N1) && VT == MVT::i1 && + if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() && (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { @@ -5390,7 +5895,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { } } // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants - if (isAllOnesConstant(N1) && + if (isAllOnesConstant(N1) && N0.hasOneUse() && (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) { @@ -5411,13 +5916,19 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { } // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X) - unsigned OpSizeInBits = VT.getScalarSizeInBits(); - if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 && - N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0) && - TLI.isOperationLegalOrCustom(ISD::ABS, VT)) { - if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1))) - if (C->getAPIntValue() == (OpSizeInBits - 1)) - return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0.getOperand(0)); + if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) { + SDValue A = N0.getOpcode() == ISD::ADD ? N0 : N1; + SDValue S = N0.getOpcode() == ISD::SRA ? N0 : N1; + if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) { + SDValue A0 = A.getOperand(0), A1 = A.getOperand(1); + SDValue S0 = S.getOperand(0); + if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) { + unsigned OpSizeInBits = VT.getScalarSizeInBits(); + if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1))) + if (C->getAPIntValue() == (OpSizeInBits - 1)) + return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0); + } + } } // fold (xor x, x) -> 0 @@ -5454,6 +5965,10 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) return Tmp; + // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable + if (SDValue MM = unfoldMaskedMerge(N)) + return MM; + // Simplify the expression using non-local knowledge. if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); @@ -5656,7 +6171,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) { return Val->getAPIntValue().uge(OpSizeInBits); }; - if (matchUnaryPredicate(N1, MatchShiftTooBig)) + if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig)) return DAG.getUNDEF(VT); // fold (shl x, 0) -> x if (N1C && N1C->isNullValue()) @@ -5691,7 +6206,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); return (c1 + c2).uge(OpSizeInBits); }; - if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange)) + if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange)) return DAG.getConstant(0, SDLoc(N), VT); auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS, @@ -5701,7 +6216,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); return (c1 + c2).ult(OpSizeInBits); }; - if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) { + if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) { SDLoc DL(N); EVT ShiftVT = N1.getValueType(); SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1)); @@ -5877,7 +6392,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) { return Val->getAPIntValue().uge(OpSizeInBits); }; - if (matchUnaryPredicate(N1, MatchShiftTooBig)) + if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig)) return DAG.getUNDEF(VT); // fold (sra x, 0) -> x if (N1C && N1C->isNullValue()) @@ -5912,7 +6427,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); return (c1 + c2).uge(OpSizeInBits); }; - if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange)) + if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange)) return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), DAG.getConstant(OpSizeInBits - 1, DL, ShiftVT)); @@ -5923,7 +6438,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); return (c1 + c2).ult(OpSizeInBits); }; - if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) { + if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) { SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1)); return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), Sum); } @@ -6041,7 +6556,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) { return Val->getAPIntValue().uge(OpSizeInBits); }; - if (matchUnaryPredicate(N1, MatchShiftTooBig)) + if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig)) return DAG.getUNDEF(VT); // fold (srl x, 0) -> x if (N1C && N1C->isNullValue()) @@ -6064,7 +6579,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); return (c1 + c2).uge(OpSizeInBits); }; - if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange)) + if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange)) return DAG.getConstant(0, SDLoc(N), VT); auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS, @@ -6074,7 +6589,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); return (c1 + c2).ult(OpSizeInBits); }; - if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) { + if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) { SDLoc DL(N); EVT ShiftVT = N1.getValueType(); SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1)); @@ -6285,6 +6800,13 @@ SDValue DAGCombiner::visitCTLZ(SDNode *N) { // fold (ctlz c1) -> c2 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0); + + // If the value is known never to be zero, switch to the undef version. + if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) { + if (DAG.isKnownNeverZero(N0)) + return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0); + } + return SDValue(); } @@ -6305,6 +6827,13 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) { // fold (cttz c1) -> c2 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0); + + // If the value is known never to be zero, switch to the undef version. + if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) { + if (DAG.isKnownNeverZero(N0)) + return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); + } + return SDValue(); } @@ -6328,7 +6857,7 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) { return SDValue(); } -/// \brief Generate Min/Max node +/// Generate Min/Max node static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode CC, const TargetLowering &TLI, @@ -6443,9 +6972,9 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { // in another basic block or it could require searching a complicated // expression. if (CondVT.isInteger() && - TLI.getBooleanContents(false, true) == + TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) == TargetLowering::ZeroOrOneBooleanContent && - TLI.getBooleanContents(false, false) == + TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) == TargetLowering::ZeroOrOneBooleanContent && C1->isNullValue() && C2->isOne()) { SDValue NotCond = @@ -6574,15 +7103,10 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { } } - // select (xor Cond, 1), X, Y -> select Cond, Y, X if (VT0 == MVT::i1) { - if (N0->getOpcode() == ISD::XOR) { - if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) { - SDValue Cond0 = N0->getOperand(0); - if (C->isOne()) - return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N2, N1); - } - } + // select (not Cond), N1, N2 -> select Cond, N2, N1 + if (isBitwiseNot(N0)) + return DAG.getNode(ISD::SELECT, DL, VT, N0->getOperand(0), N2, N1); } // fold selects based on a setcc into other things, such as min/max/abs @@ -6726,6 +7250,7 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) { SDValue DataLo, DataHi; std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); + SDValue Scale = MSC->getScale(); SDValue BasePtr = MSC->getBasePtr(); SDValue IndexLo, IndexHi; std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL); @@ -6735,11 +7260,11 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) { MachineMemOperand::MOStore, LoMemVT.getStoreSize(), Alignment, MSC->getAAInfo(), MSC->getRanges()); - SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo }; + SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale }; Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(), DL, OpsLo, MMO); - SDValue OpsHi[] = {Chain, DataHi, MaskHi, BasePtr, IndexHi}; + SDValue OpsHi[] = { Chain, DataHi, MaskHi, BasePtr, IndexHi, Scale }; Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), DL, OpsHi, MMO); @@ -6800,12 +7325,12 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, MST->isCompressingStore()); + unsigned HiOffset = LoMemVT.getStoreSize(); - MMO = DAG.getMachineFunction(). - getMachineMemOperand(MST->getPointerInfo(), - MachineMemOperand::MOStore, HiMemVT.getStoreSize(), - SecondHalfAlignment, MST->getAAInfo(), - MST->getRanges()); + MMO = DAG.getMachineFunction().getMachineMemOperand( + MST->getPointerInfo().getWithOffset(HiOffset), + MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment, + MST->getAAInfo(), MST->getRanges()); Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, MST->isTruncatingStore(), @@ -6859,6 +7384,7 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) { EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + SDValue Scale = MGT->getScale(); SDValue BasePtr = MGT->getBasePtr(); SDValue Index = MGT->getIndex(); SDValue IndexLo, IndexHi; @@ -6869,13 +7395,13 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) { MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), Alignment, MGT->getAAInfo(), MGT->getRanges()); - SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo }; + SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo, Scale }; Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo, - MMO); + MMO); - SDValue OpsHi[] = {Chain, Src0Hi, MaskHi, BasePtr, IndexHi}; + SDValue OpsHi[] = { Chain, Src0Hi, MaskHi, BasePtr, IndexHi, Scale }; Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi, - MMO); + MMO); AddToWorklist(Lo.getNode()); AddToWorklist(Hi.getNode()); @@ -6949,11 +7475,12 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, MLD->isExpandingLoad()); + unsigned HiOffset = LoMemVT.getStoreSize(); - MMO = DAG.getMachineFunction(). - getMachineMemOperand(MLD->getPointerInfo(), - MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), - SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); + MMO = DAG.getMachineFunction().getMachineMemOperand( + MLD->getPointerInfo().getWithOffset(HiOffset), + MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment, + MLD->getAAInfo(), MLD->getRanges()); Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad()); @@ -7071,6 +7598,36 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { AddToWorklist(Add.getNode()); return DAG.getNode(ISD::XOR, DL, VT, Add, Shift); } + + // If this select has a condition (setcc) with narrower operands than the + // select, try to widen the compare to match the select width. + // TODO: This should be extended to handle any constant. + // TODO: This could be extended to handle non-loading patterns, but that + // requires thorough testing to avoid regressions. + if (isNullConstantOrNullSplatConstant(RHS)) { + EVT NarrowVT = LHS.getValueType(); + EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger(); + EVT SetCCVT = getSetCCResultType(LHS.getValueType()); + unsigned SetCCWidth = SetCCVT.getScalarSizeInBits(); + unsigned WideWidth = WideVT.getScalarSizeInBits(); + bool IsSigned = isSignedIntSetCC(CC); + auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD; + if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() && + SetCCWidth != 1 && SetCCWidth < WideWidth && + TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) && + TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) { + // Both compare operands can be widened for free. The LHS can use an + // extended load, and the RHS is a constant: + // vselect (ext (setcc load(X), C)), N1, N2 --> + // vselect (setcc extload(X), C'), N1, N2 + auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS); + SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS); + EVT WideSetCCVT = getSetCCResultType(WideVT); + SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC); + return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2); + } + } } if (SimplifySelectOps(N, N1, N2)) @@ -7142,22 +7699,33 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { } SDValue DAGCombiner::visitSETCC(SDNode *N) { - return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1), - cast<CondCodeSDNode>(N->getOperand(2))->get(), - SDLoc(N)); -} + // setcc is very commonly used as an argument to brcond. This pattern + // also lend itself to numerous combines and, as a result, it is desired + // we keep the argument to a brcond as a setcc as much as possible. + bool PreferSetCC = + N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND; -SDValue DAGCombiner::visitSETCCE(SDNode *N) { - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); - SDValue Carry = N->getOperand(2); - SDValue Cond = N->getOperand(3); + SDValue Combined = SimplifySetCC( + N->getValueType(0), N->getOperand(0), N->getOperand(1), + cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC); - // If Carry is false, fold to a regular SETCC. - if (Carry.getOpcode() == ISD::CARRY_FALSE) - return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond); + if (!Combined) + return SDValue(); - return SDValue(); + // If we prefer to have a setcc, and we don't, we'll try our best to + // recreate one using rebuildSetCC. + if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) { + SDValue NewSetCC = rebuildSetCC(Combined); + + // We don't have anything interesting to combine to. + if (NewSetCC.getNode() == N) + return SDValue(); + + if (NewSetCC) + return NewSetCC; + } + + return Combined; } SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) { @@ -7237,12 +7805,12 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))" // transformation. Returns true if extension are possible and the above // mentioned transformation is profitable. -static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, +static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0, unsigned ExtOpc, SmallVectorImpl<SDNode *> &ExtendNodes, const TargetLowering &TLI) { bool HasCopyToRegUses = false; - bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType()); + bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType()); for (SDNode::use_iterator UI = N0.getNode()->use_begin(), UE = N0.getNode()->use_end(); UI != UE; ++UI) { @@ -7298,16 +7866,16 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, } void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, - SDValue Trunc, SDValue ExtLoad, - const SDLoc &DL, ISD::NodeType ExtType) { + SDValue OrigLoad, SDValue ExtLoad, + ISD::NodeType ExtType) { // Extend SetCC uses if necessary. - for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { - SDNode *SetCC = SetCCs[i]; + SDLoc DL(ExtLoad); + for (SDNode *SetCC : SetCCs) { SmallVector<SDValue, 4> Ops; for (unsigned j = 0; j != 2; ++j) { SDValue SOp = SetCC->getOperand(j); - if (SOp == Trunc) + if (SOp == OrigLoad) Ops.push_back(ExtLoad); else Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp)); @@ -7356,7 +7924,7 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) { return SDValue(); SmallVector<SDNode *, 4> SetCCs; - if (!ExtendUsesToFormExtLoad(N, N0, N->getOpcode(), SetCCs, TLI)) + if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI)) return SDValue(); ISD::LoadExtType ExtType = @@ -7387,7 +7955,7 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) { const unsigned Align = MinAlign(LN0->getAlignment(), Offset); SDValue SplitLoad = DAG.getExtLoad( - ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr, + ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr, LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align, LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); @@ -7410,12 +7978,82 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) { // with a truncate of the concatenated sextloaded vectors. SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue); + ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode()); CombineTo(N0.getNode(), Trunc, NewChain); - ExtendSetCCUses(SetCCs, Trunc, NewValue, DL, - (ISD::NodeType)N->getOpcode()); return SDValue(N, 0); // Return N so it doesn't get rechecked! } +// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) -> +// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst)) +SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) { + assert(N->getOpcode() == ISD::ZERO_EXTEND); + EVT VT = N->getValueType(0); + + // and/or/xor + SDValue N0 = N->getOperand(0); + if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || + N0.getOpcode() == ISD::XOR) || + N0.getOperand(1).getOpcode() != ISD::Constant || + (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT))) + return SDValue(); + + // shl/shr + SDValue N1 = N0->getOperand(0); + if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) || + N1.getOperand(1).getOpcode() != ISD::Constant || + (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT))) + return SDValue(); + + // load + if (!isa<LoadSDNode>(N1.getOperand(0))) + return SDValue(); + LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0)); + EVT MemVT = Load->getMemoryVT(); + if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) || + Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed()) + return SDValue(); + + + // If the shift op is SHL, the logic op must be AND, otherwise the result + // will be wrong. + if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND) + return SDValue(); + + if (!N0.hasOneUse() || !N1.hasOneUse()) + return SDValue(); + + SmallVector<SDNode*, 4> SetCCs; + if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0), + ISD::ZERO_EXTEND, SetCCs, TLI)) + return SDValue(); + + // Actually do the transformation. + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT, + Load->getChain(), Load->getBasePtr(), + Load->getMemoryVT(), Load->getMemOperand()); + + SDLoc DL1(N1); + SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad, + N1.getOperand(1)); + + APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + Mask = Mask.zext(VT.getSizeInBits()); + SDLoc DL0(N0); + SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift, + DAG.getConstant(Mask, DL0, VT)); + + ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND); + CombineTo(N, And); + if (SDValue(Load, 0).hasOneUse()) { + DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1)); + } else { + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load), + Load->getValueType(0), ExtLoad); + CombineTo(Load, Trunc, ExtLoad.getValue(1)); + } + return SDValue(N,0); // Return N so it doesn't get rechecked! +} + /// If we're narrowing or widening the result of a vector select and the final /// size is the same size as a setcc (compare) feeding the select, then try to /// apply the cast operation to the select's operands because matching vector @@ -7461,6 +8099,106 @@ SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) { return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB); } +// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x))) +// fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x))) +static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, + const TargetLowering &TLI, EVT VT, + bool LegalOperations, SDNode *N, + SDValue N0, ISD::LoadExtType ExtLoadType) { + SDNode *N0Node = N0.getNode(); + bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node) + : ISD::isZEXTLoad(N0Node); + if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) || + !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse()) + return {}; + + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + EVT MemVT = LN0->getMemoryVT(); + if ((LegalOperations || LN0->isVolatile()) && + !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT)) + return {}; + + SDValue ExtLoad = + DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(), + LN0->getBasePtr(), MemVT, LN0->getMemOperand()); + Combiner.CombineTo(N, ExtLoad); + DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! +} + +// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x))) +// Only generate vector extloads when 1) they're legal, and 2) they are +// deemed desirable by the target. +static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, + const TargetLowering &TLI, EVT VT, + bool LegalOperations, SDNode *N, SDValue N0, + ISD::LoadExtType ExtLoadType, + ISD::NodeType ExtOpc) { + if (!ISD::isNON_EXTLoad(N0.getNode()) || + !ISD::isUNINDEXEDLoad(N0.getNode()) || + ((LegalOperations || VT.isVector() || + cast<LoadSDNode>(N0)->isVolatile()) && + !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))) + return {}; + + bool DoXform = true; + SmallVector<SDNode *, 4> SetCCs; + if (!N0.hasOneUse()) + DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI); + if (VT.isVector()) + DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0)); + if (!DoXform) + return {}; + + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(), + LN0->getBasePtr(), N0.getValueType(), + LN0->getMemOperand()); + Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc); + // If the load value is used only by N, replace it via CombineTo N. + bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse(); + Combiner.CombineTo(N, ExtLoad); + if (NoReplaceTrunc) { + DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); + } else { + SDValue Trunc = + DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); + Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1)); + } + return SDValue(N, 0); // Return N so it doesn't get rechecked! +} + +static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, + bool LegalOperations) { + assert((N->getOpcode() == ISD::SIGN_EXTEND || + N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext"); + + SDValue SetCC = N->getOperand(0); + if (LegalOperations || SetCC.getOpcode() != ISD::SETCC || + !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1) + return SDValue(); + + SDValue X = SetCC.getOperand(0); + SDValue Ones = SetCC.getOperand(1); + ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get(); + EVT VT = N->getValueType(0); + EVT XVT = X.getValueType(); + // setge X, C is canonicalized to setgt, so we do not need to match that + // pattern. The setlt sibling is folded in SimplifySelectCC() because it does + // not require the 'not' op. + if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) { + // Invert and smear/shift the sign bit: + // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1) + // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1) + SDLoc DL(N); + SDValue NotX = DAG.getNOT(DL, X, VT); + SDValue ShiftAmount = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT); + auto ShiftOpcode = N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL; + return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount); + } + return SDValue(); +} + SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -7525,62 +8263,21 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { } } - // fold (sext (load x)) -> (sext (truncate (sextload x))) - // Only generate vector extloads when 1) they're legal, and 2) they are - // deemed desirable by the target. - if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && - ((!LegalOperations && !VT.isVector() && - !cast<LoadSDNode>(N0)->isVolatile()) || - TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) { - bool DoXform = true; - SmallVector<SDNode*, 4> SetCCs; - if (!N0.hasOneUse()) - DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); - if (VT.isVector()) - DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0)); - if (DoXform) { - LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(), - LN0->getBasePtr(), N0.getValueType(), - LN0->getMemOperand()); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), - N0.getValueType(), ExtLoad); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND); - // If the load value is used only by N, replace it via CombineTo N. - bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse(); - CombineTo(N, ExtLoad); - if (NoReplaceTrunc) - DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); - else - CombineTo(LN0, Trunc, ExtLoad.getValue(1)); - return SDValue(N, 0); - } - } + // Try to simplify (sext (load x)). + if (SDValue foldedExt = + tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0, + ISD::SEXTLOAD, ISD::SIGN_EXTEND)) + return foldedExt; // fold (sext (load x)) to multiple smaller sextloads. // Only on illegal but splittable vectors. if (SDValue ExtLoad = CombineExtLoad(N)) return ExtLoad; - // fold (sext (sextload x)) -> (sext (truncate (sextload x))) - // fold (sext ( extload x)) -> (sext (truncate (sextload x))) - if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && - ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { - LoadSDNode *LN0 = cast<LoadSDNode>(N0); - EVT MemVT = LN0->getMemoryVT(); - if ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) { - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(), - LN0->getBasePtr(), MemVT, - LN0->getMemOperand()); - CombineTo(N, ExtLoad); - CombineTo(N0.getNode(), - DAG.getNode(ISD::TRUNCATE, SDLoc(N0), - N0.getValueType(), ExtLoad), - ExtLoad.getValue(1)); - return SDValue(N, 0); // Return N so it doesn't get rechecked! - } - } + // Try to simplify (sext (sextload x)). + if (SDValue foldedExt = tryToFoldExtOfExtload( + DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD)) + return foldedExt; // fold (sext (and/or/xor (load x), cst)) -> // (and/or/xor (sextload x), (sext cst)) @@ -7588,30 +8285,26 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { N0.getOpcode() == ISD::XOR) && isa<LoadSDNode>(N0.getOperand(0)) && N0.getOperand(1).getOpcode() == ISD::Constant && - TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) && (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { - LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); - if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) { - bool DoXform = true; + LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0)); + EVT MemVT = LN00->getMemoryVT(); + if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) && + LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) { SmallVector<SDNode*, 4> SetCCs; - if (!N0.hasOneUse()) - DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND, - SetCCs, TLI); + bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0), + ISD::SIGN_EXTEND, SetCCs, TLI); if (DoXform) { - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT, - LN0->getChain(), LN0->getBasePtr(), - LN0->getMemoryVT(), - LN0->getMemOperand()); + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT, + LN00->getChain(), LN00->getBasePtr(), + LN00->getMemoryVT(), + LN00->getMemOperand()); APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); Mask = Mask.sext(VT.getSizeInBits()); SDValue And = DAG.getNode(N0.getOpcode(), DL, VT, ExtLoad, DAG.getConstant(Mask, DL, VT)); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, - SDLoc(N0.getOperand(0)), - N0.getOperand(0).getValueType(), ExtLoad); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND); + ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND); bool NoReplaceTruncAnd = !N0.hasOneUse(); - bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse(); + bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse(); CombineTo(N, And); // If N0 has multiple uses, change other uses as well. if (NoReplaceTruncAnd) { @@ -7619,15 +8312,21 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And); CombineTo(N0.getNode(), TruncAnd); } - if (NoReplaceTrunc) - DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); - else - CombineTo(LN0, Trunc, ExtLoad.getValue(1)); + if (NoReplaceTrunc) { + DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1)); + } else { + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00), + LN00->getValueType(0), ExtLoad); + CombineTo(LN00, Trunc, ExtLoad.getValue(1)); + } return SDValue(N,0); // Return N so it doesn't get rechecked! } } } + if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations)) + return V; + if (N0.getOpcode() == ISD::SETCC) { SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); @@ -7674,8 +8373,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // If the type of the setcc is larger (say, i8) then the value of the high // bit depends on getBooleanContents(), so ask TLI for a real "true" value // of the appropriate width. - SDValue ExtTrueVal = (SetCCWidth == 1) ? DAG.getAllOnesConstant(DL, VT) - : TLI.getConstTrueVal(DAG, VT, DL); + SDValue ExtTrueVal = (SetCCWidth == 1) + ? DAG.getAllOnesConstant(DL, VT) + : DAG.getBoolConstant(true, DL, VT, N00VT); SDValue Zero = DAG.getConstant(0, DL, VT); if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true)) @@ -7792,13 +8492,16 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // Try to mask before the extension to avoid having to generate a larger mask, // possibly over several sub-vectors. - if (SrcVT.bitsLT(VT)) { + if (SrcVT.bitsLT(VT) && VT.isVector()) { if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) && TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) { SDValue Op = N0.getOperand(0); Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType()); AddToWorklist(Op.getNode()); - return DAG.getZExtOrTrunc(Op, SDLoc(N), VT); + SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT); + // Transfer the debug info; the new node is equivalent to N0. + DAG.transferDbgValues(N0, ZExtOrTrunc); + return ZExtOrTrunc; } } @@ -7830,39 +8533,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { X, DAG.getConstant(Mask, DL, VT)); } - // fold (zext (load x)) -> (zext (truncate (zextload x))) - // Only generate vector extloads when 1) they're legal, and 2) they are - // deemed desirable by the target. - if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && - ((!LegalOperations && !VT.isVector() && - !cast<LoadSDNode>(N0)->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) { - bool DoXform = true; - SmallVector<SDNode*, 4> SetCCs; - if (!N0.hasOneUse()) - DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI); - if (VT.isVector()) - DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0)); - if (DoXform) { - LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, - LN0->getChain(), - LN0->getBasePtr(), N0.getValueType(), - LN0->getMemOperand()); - - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), - N0.getValueType(), ExtLoad); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::ZERO_EXTEND); - // If the load value is used only by N, replace it via CombineTo N. - bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse(); - CombineTo(N, ExtLoad); - if (NoReplaceTrunc) - DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); - else - CombineTo(LN0, Trunc, ExtLoad.getValue(1)); - return SDValue(N, 0); // Return N so it doesn't get rechecked! - } - } + // Try to simplify (zext (load x)). + if (SDValue foldedExt = + tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0, + ISD::ZEXTLOAD, ISD::ZERO_EXTEND)) + return foldedExt; // fold (zext (load x)) to multiple smaller zextloads. // Only on illegal but splittable vectors. @@ -7877,10 +8552,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { N0.getOpcode() == ISD::XOR) && isa<LoadSDNode>(N0.getOperand(0)) && N0.getOperand(1).getOpcode() == ISD::Constant && - TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) && (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { - LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); - if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) { + LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0)); + EVT MemVT = LN00->getMemoryVT(); + if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) && + LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) { bool DoXform = true; SmallVector<SDNode*, 4> SetCCs; if (!N0.hasOneUse()) { @@ -7888,29 +8564,26 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { auto *AndC = cast<ConstantSDNode>(N0.getOperand(1)); EVT LoadResultTy = AndC->getValueType(0); EVT ExtVT; - if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT)) + if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT)) DoXform = false; } - if (DoXform) - DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), - ISD::ZERO_EXTEND, SetCCs, TLI); } + if (DoXform) + DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0), + ISD::ZERO_EXTEND, SetCCs, TLI); if (DoXform) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT, - LN0->getChain(), LN0->getBasePtr(), - LN0->getMemoryVT(), - LN0->getMemOperand()); + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT, + LN00->getChain(), LN00->getBasePtr(), + LN00->getMemoryVT(), + LN00->getMemOperand()); APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); Mask = Mask.zext(VT.getSizeInBits()); SDLoc DL(N); SDValue And = DAG.getNode(N0.getOpcode(), DL, VT, ExtLoad, DAG.getConstant(Mask, DL, VT)); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, - SDLoc(N0.getOperand(0)), - N0.getOperand(0).getValueType(), ExtLoad); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::ZERO_EXTEND); + ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND); bool NoReplaceTruncAnd = !N0.hasOneUse(); - bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse(); + bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse(); CombineTo(N, And); // If N0 has multiple uses, change other uses as well. if (NoReplaceTruncAnd) { @@ -7918,35 +8591,30 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And); CombineTo(N0.getNode(), TruncAnd); } - if (NoReplaceTrunc) - DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); - else - CombineTo(LN0, Trunc, ExtLoad.getValue(1)); + if (NoReplaceTrunc) { + DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1)); + } else { + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00), + LN00->getValueType(0), ExtLoad); + CombineTo(LN00, Trunc, ExtLoad.getValue(1)); + } return SDValue(N,0); // Return N so it doesn't get rechecked! } } } - // fold (zext (zextload x)) -> (zext (truncate (zextload x))) - // fold (zext ( extload x)) -> (zext (truncate (zextload x))) - if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && - ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { - LoadSDNode *LN0 = cast<LoadSDNode>(N0); - EVT MemVT = LN0->getMemoryVT(); - if ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, - LN0->getChain(), - LN0->getBasePtr(), MemVT, - LN0->getMemOperand()); - CombineTo(N, ExtLoad); - CombineTo(N0.getNode(), - DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), - ExtLoad), - ExtLoad.getValue(1)); - return SDValue(N, 0); // Return N so it doesn't get rechecked! - } - } + // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) -> + // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst)) + if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N)) + return ZExtLoad; + + // Try to simplify (zext (zextload x)). + if (SDValue foldedExt = tryToFoldExtOfExtload( + DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD)) + return foldedExt; + + if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations)) + return V; if (N0.getOpcode() == ISD::SETCC) { // Only do this before legalize for now. @@ -8084,24 +8752,25 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { bool DoXform = true; SmallVector<SDNode*, 4> SetCCs; if (!N0.hasOneUse()) - DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI); + DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, + TLI); if (DoXform) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(), LN0->getBasePtr(), N0.getValueType(), LN0->getMemOperand()); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), - N0.getValueType(), ExtLoad); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), - ISD::ANY_EXTEND); + ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND); // If the load value is used only by N, replace it via CombineTo N. bool NoReplaceTrunc = N0.hasOneUse(); CombineTo(N, ExtLoad); - if (NoReplaceTrunc) + if (NoReplaceTrunc) { DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); - else + } else { + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), + N0.getValueType(), ExtLoad); CombineTo(LN0, Trunc, ExtLoad.getValue(1)); + } return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -8109,9 +8778,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // fold (aext (zextload x)) -> (aext (truncate (zextload x))) // fold (aext (sextload x)) -> (aext (truncate (sextload x))) // fold (aext ( extload x)) -> (aext (truncate (extload x))) - if (N0.getOpcode() == ISD::LOAD && - !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && - N0.hasOneUse()) { + if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) && + ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); ISD::LoadExtType ExtType = LN0->getExtensionType(); EVT MemVT = LN0->getMemoryVT(); @@ -8120,10 +8788,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { VT, LN0->getChain(), LN0->getBasePtr(), MemVT, LN0->getMemOperand()); CombineTo(N, ExtLoad); - CombineTo(N0.getNode(), - DAG.getNode(ISD::TRUNCATE, SDLoc(N0), - N0.getValueType(), ExtLoad), - ExtLoad.getValue(1)); + DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -8263,8 +8928,9 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { unsigned ShAmt = 0; if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { - if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { - ShAmt = N01->getZExtValue(); + SDValue SRL = N0; + if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) { + ShAmt = ConstShift->getZExtValue(); unsigned EVTBits = ExtVT.getSizeInBits(); // Is the shift amount a multiple of size of VT? if ((ShAmt & (EVTBits-1)) == 0) { @@ -8277,17 +8943,36 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // At this point, we must have a load or else we can't do the transform. if (!isa<LoadSDNode>(N0)) return SDValue(); + auto *LN0 = cast<LoadSDNode>(N0); + // Because a SRL must be assumed to *need* to zero-extend the high bits // (as opposed to anyext the high bits), we can't combine the zextload // lowering of SRL and an sextload. - if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD) + if (LN0->getExtensionType() == ISD::SEXTLOAD) return SDValue(); // If the shift amount is larger than the input type then we're not // accessing any of the loaded bytes. If the load was a zextload/extload // then the result of the shift+trunc is zero/undef (handled elsewhere). - if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits()) + if (ShAmt >= LN0->getMemoryVT().getSizeInBits()) return SDValue(); + + // If the SRL is only used by a masking AND, we may be able to adjust + // the ExtVT to make the AND redundant. + SDNode *Mask = *(SRL->use_begin()); + if (Mask->getOpcode() == ISD::AND && + isa<ConstantSDNode>(Mask->getOperand(1))) { + const APInt &ShiftMask = + cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue(); + if (ShiftMask.isMask()) { + EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), + ShiftMask.countTrailingOnes()); + // If the mask is smaller, recompute the type. + if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) && + TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT)) + ExtVT = MaskedVT; + } + } } } @@ -8307,7 +8992,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { return SDValue(); LoadSDNode *LN0 = cast<LoadSDNode>(N0); - if (!isLegalNarrowLoad(LN0, ExtType, ExtVT, ShAmt)) + if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt)) return SDValue(); // For big endian targets, we need to adjust the offset to the pointer to @@ -8403,7 +9088,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1); } - // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_in_reg x) + // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x) if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG || N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG || N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) && @@ -8777,6 +9462,22 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2)); } + // fold (truncate (extract_subvector(ext x))) -> + // (extract_subvector x) + // TODO: This can be generalized to cover cases where the truncate and extract + // do not fully cancel each other out. + if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::SIGN_EXTEND || + N00.getOpcode() == ISD::ZERO_EXTEND || + N00.getOpcode() == ISD::ANY_EXTEND) { + if (N00.getOperand(0)->getValueType(0).getVectorElementType() == + VT.getVectorElementType()) + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT, + N00.getOperand(0), N0.getOperand(1)); + } + } + if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) return NewVSel; @@ -8897,17 +9598,17 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { } // If the input is a constant, let getNode fold it. - if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) { - // If we can't allow illegal operations, we need to check that this is just - // a fp -> int or int -> conversion and that the resulting operation will - // be legal. - if (!LegalOperations || - (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() && - TLI.isOperationLegal(ISD::ConstantFP, VT)) || - (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() && - TLI.isOperationLegal(ISD::Constant, VT))) - return DAG.getBitcast(VT, N0); - } + // We always need to check that this is just a fp -> int or int -> conversion + // otherwise we will get back N which will confuse the caller into thinking + // we used CombineTo. This can block target combines from running. If we can't + // allowed legal operations, we need to ensure the resulting operation will be + // legal. + // TODO: Maybe we should check that the return value isn't N explicitly? + if ((isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() && + (!LegalOperations || TLI.isOperationLegal(ISD::ConstantFP, VT))) || + (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() && + (!LegalOperations || TLI.isOperationLegal(ISD::Constant, VT)))) + return DAG.getBitcast(VT, N0); // (conv (conv x, t1), t2) -> (conv x, t2) if (N0.getOpcode() == ISD::BITCAST) @@ -9253,7 +9954,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { static bool isContractable(SDNode *N) { SDNodeFlags F = N->getFlags(); - return F.hasAllowContract() || F.hasUnsafeAlgebra(); + return F.hasAllowContract() || F.hasAllowReassociation(); } /// Try to perform FMA combining on a given FADD node. @@ -9277,8 +9978,10 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { if (!HasFMAD && !HasFMA) return SDValue(); + SDNodeFlags Flags = N->getFlags(); + bool CanFuse = Options.UnsafeFPMath || isContractable(N); bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast || - Options.UnsafeFPMath || HasFMAD); + CanFuse || HasFMAD); // If the addition is not contractable, do not combine. if (!AllowFusionGlobally && !isContractable(N)) return SDValue(); @@ -9308,14 +10011,14 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { // fold (fadd (fmul x, y), z) -> (fma x, y, z) if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, - N0.getOperand(0), N0.getOperand(1), N1); + N0.getOperand(0), N0.getOperand(1), N1, Flags); } // fold (fadd x, (fmul y, z)) -> (fma y, z, x) // Note: Commutes FADD operands. if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, - N1.getOperand(0), N1.getOperand(1), N0); + N1.getOperand(0), N1.getOperand(1), N0, Flags); } // Look through FP_EXTEND nodes to do more combining. @@ -9329,7 +10032,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)), DAG.getNode(ISD::FP_EXTEND, SL, VT, - N00.getOperand(1)), N1); + N00.getOperand(1)), N1, Flags); } } @@ -9343,16 +10046,14 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)), DAG.getNode(ISD::FP_EXTEND, SL, VT, - N10.getOperand(1)), N0); + N10.getOperand(1)), N0, Flags); } } // More folding opportunities when target permits. if (Aggressive) { // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) - // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF - // are currently only supported on binary nodes. - if (Options.UnsafeFPMath && + if (CanFuse && N0.getOpcode() == PreferredFusedOpcode && N0.getOperand(2).getOpcode() == ISD::FMUL && N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) { @@ -9361,13 +10062,11 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(2).getOperand(0), N0.getOperand(2).getOperand(1), - N1)); + N1, Flags), Flags); } // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) - // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF - // are currently only supported on binary nodes. - if (Options.UnsafeFPMath && + if (CanFuse && N1->getOpcode() == PreferredFusedOpcode && N1.getOperand(2).getOpcode() == ISD::FMUL && N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) { @@ -9376,19 +10075,20 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(2).getOperand(0), N1.getOperand(2).getOperand(1), - N0)); + N0, Flags), Flags); } // fold (fadd (fma x, y, (fpext (fmul u, v))), z) // -> (fma x, y, (fma (fpext u), (fpext v), z)) auto FoldFAddFMAFPExtFMul = [&] ( - SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) { + SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z, + SDNodeFlags Flags) { return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y, DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, U), DAG.getNode(ISD::FP_EXTEND, SL, VT, V), - Z)); + Z, Flags), Flags); }; if (N0.getOpcode() == PreferredFusedOpcode) { SDValue N02 = N0.getOperand(2); @@ -9398,7 +10098,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) { return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1), N020.getOperand(0), N020.getOperand(1), - N1); + N1, Flags); } } } @@ -9409,14 +10109,15 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { // operation into two double-precision operations, which might not be // interesting for all targets, especially GPUs. auto FoldFAddFPExtFMAFMul = [&] ( - SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) { + SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z, + SDNodeFlags Flags) { return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, X), DAG.getNode(ISD::FP_EXTEND, SL, VT, Y), DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, U), DAG.getNode(ISD::FP_EXTEND, SL, VT, V), - Z)); + Z, Flags), Flags); }; if (N0.getOpcode() == ISD::FP_EXTEND) { SDValue N00 = N0.getOperand(0); @@ -9426,7 +10127,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1), N002.getOperand(0), N002.getOperand(1), - N1); + N1, Flags); } } } @@ -9441,7 +10142,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) { return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1), N120.getOperand(0), N120.getOperand(1), - N0); + N0, Flags); } } } @@ -9459,7 +10160,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) { return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1), N102.getOperand(0), N102.getOperand(1), - N0); + N0, Flags); } } } @@ -9488,8 +10189,11 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { if (!HasFMAD && !HasFMA) return SDValue(); + const SDNodeFlags Flags = N->getFlags(); + bool CanFuse = Options.UnsafeFPMath || isContractable(N); bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast || - Options.UnsafeFPMath || HasFMAD); + CanFuse || HasFMAD); + // If the subtraction is not contractable, do not combine. if (!AllowFusionGlobally && !isContractable(N)) return SDValue(); @@ -9514,16 +10218,17 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1), - DAG.getNode(ISD::FNEG, SL, VT, N1)); + DAG.getNode(ISD::FNEG, SL, VT, N1), Flags); } // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) // Note: Commutes FSUB operands. - if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) + if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), - N1.getOperand(1), N0); + N1.getOperand(1), N0, Flags); + } // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) && @@ -9532,7 +10237,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { SDValue N01 = N0.getOperand(0).getOperand(1); return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, N00), N01, - DAG.getNode(ISD::FNEG, SL, VT, N1)); + DAG.getNode(ISD::FNEG, SL, VT, N1), Flags); } // Look through FP_EXTEND nodes to do more combining. @@ -9548,7 +10253,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { N00.getOperand(0)), DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), - DAG.getNode(ISD::FNEG, SL, VT, N1)); + DAG.getNode(ISD::FNEG, SL, VT, N1), Flags); } } @@ -9565,7 +10270,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { N10.getOperand(0))), DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), - N0); + N0, Flags); } } @@ -9587,7 +10292,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { N000.getOperand(0)), DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)), - N1)); + N1, Flags)); } } } @@ -9610,7 +10315,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { N000.getOperand(0)), DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)), - N1)); + N1, Flags)); } } } @@ -9619,9 +10324,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { if (Aggressive) { // fold (fsub (fma x, y, (fmul u, v)), z) // -> (fma x, y (fma u, v, (fneg z))) - // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF - // are currently only supported on binary nodes. - if (Options.UnsafeFPMath && N0.getOpcode() == PreferredFusedOpcode && + if (CanFuse && N0.getOpcode() == PreferredFusedOpcode && isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) { return DAG.getNode(PreferredFusedOpcode, SL, VT, @@ -9630,14 +10333,12 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { N0.getOperand(2).getOperand(0), N0.getOperand(2).getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, - N1))); + N1), Flags), Flags); } // fold (fsub x, (fma y, z, (fmul u, v))) // -> (fma (fneg y), z, (fma (fneg u), v, x)) - // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF - // are currently only supported on binary nodes. - if (Options.UnsafeFPMath && N1.getOpcode() == PreferredFusedOpcode && + if (CanFuse && N1.getOpcode() == PreferredFusedOpcode && isContractableFMUL(N1.getOperand(2))) { SDValue N20 = N1.getOperand(2).getOperand(0); SDValue N21 = N1.getOperand(2).getOperand(1); @@ -9647,8 +10348,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { N1.getOperand(1), DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, N20), - - N21, N0)); + N21, N0, Flags), Flags); } @@ -9668,7 +10368,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)), DAG.getNode(ISD::FNEG, SL, VT, - N1))); + N1), Flags), Flags); } } } @@ -9696,7 +10396,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)), DAG.getNode(ISD::FNEG, SL, VT, - N1))); + N1), Flags), Flags); } } } @@ -9719,7 +10419,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { VT, N1200)), DAG.getNode(ISD::FP_EXTEND, SL, VT, N1201), - N0)); + N0, Flags), Flags); } } @@ -9750,7 +10450,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { VT, N1020)), DAG.getNode(ISD::FP_EXTEND, SL, VT, N1021), - N0)); + N0, Flags), Flags); } } } @@ -9766,6 +10466,7 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) { SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); SDLoc SL(N); + const SDNodeFlags Flags = N->getFlags(); assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation"); @@ -9797,52 +10498,54 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) { // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y) // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y)) - auto FuseFADD = [&](SDValue X, SDValue Y) { + auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) { if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) { auto XC1 = isConstOrConstSplatFP(X.getOperand(1)); if (XC1 && XC1->isExactlyValue(+1.0)) - return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y); + return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, + Y, Flags); if (XC1 && XC1->isExactlyValue(-1.0)) return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, - DAG.getNode(ISD::FNEG, SL, VT, Y)); + DAG.getNode(ISD::FNEG, SL, VT, Y), Flags); } return SDValue(); }; - if (SDValue FMA = FuseFADD(N0, N1)) + if (SDValue FMA = FuseFADD(N0, N1, Flags)) return FMA; - if (SDValue FMA = FuseFADD(N1, N0)) + if (SDValue FMA = FuseFADD(N1, N0, Flags)) return FMA; // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y) // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y)) // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y)) // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y) - auto FuseFSUB = [&](SDValue X, SDValue Y) { + auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) { if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) { auto XC0 = isConstOrConstSplatFP(X.getOperand(0)); if (XC0 && XC0->isExactlyValue(+1.0)) return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y, - Y); + Y, Flags); if (XC0 && XC0->isExactlyValue(-1.0)) return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y, - DAG.getNode(ISD::FNEG, SL, VT, Y)); + DAG.getNode(ISD::FNEG, SL, VT, Y), Flags); auto XC1 = isConstOrConstSplatFP(X.getOperand(1)); if (XC1 && XC1->isExactlyValue(+1.0)) return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, - DAG.getNode(ISD::FNEG, SL, VT, Y)); + DAG.getNode(ISD::FNEG, SL, VT, Y), Flags); if (XC1 && XC1->isExactlyValue(-1.0)) - return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y); + return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, + Y, Flags); } return SDValue(); }; - if (SDValue FMA = FuseFSUB(N0, N1)) + if (SDValue FMA = FuseFSUB(N0, N1, Flags)) return FMA; - if (SDValue FMA = FuseFSUB(N1, N0)) + if (SDValue FMA = FuseFSUB(N1, N0, Flags)) return FMA; return SDValue(); @@ -9904,35 +10607,42 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { return DAG.getNode(ISD::FSUB, DL, VT, N1IsFMul ? N0 : N1, Add, Flags); } - // FIXME: Auto-upgrade the target/function-level option. - if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) { - // fold (fadd A, 0) -> A - if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1)) - if (N1C->isZero()) - return N0; + ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1); + if (N1C && N1C->isZero()) { + if (N1C->isNegative() || Options.UnsafeFPMath || + Flags.hasNoSignedZeros()) { + // fold (fadd A, 0) -> A + return N0; + } } - // If 'unsafe math' is enabled, fold lots of things. - if (Options.UnsafeFPMath) { - // No FP constant should be created after legalization as Instruction - // Selection pass has a hard time dealing with FP constants. - bool AllowNewConst = (Level < AfterLegalizeDAG); - - // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) - if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && - isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) - return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), - DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, - Flags), - Flags); + // No FP constant should be created after legalization as Instruction + // Selection pass has a hard time dealing with FP constants. + bool AllowNewConst = (Level < AfterLegalizeDAG); + // If 'unsafe math' or nnan is enabled, fold lots of things. + if ((Options.UnsafeFPMath || Flags.hasNoNaNs()) && AllowNewConst) { // If allowed, fold (fadd (fneg x), x) -> 0.0 - if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) + if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) return DAG.getConstantFP(0.0, DL, VT); // If allowed, fold (fadd x, (fneg x)) -> 0.0 - if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) + if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) return DAG.getConstantFP(0.0, DL, VT); + } + + // If 'unsafe math' or reassoc and nsz, fold lots of things. + // TODO: break out portions of the transformations below for which Unsafe is + // considered and which do not require both nsz and reassoc + if ((Options.UnsafeFPMath || + (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) && + AllowNewConst) { + // fadd (fadd x, c1), c2 -> fadd x, c1 + c2 + if (N1CFP && N0.getOpcode() == ISD::FADD && + isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) { + SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags); + return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags); + } // We can fold chains of FADD's of the same value into multiplications. // This transform is not safe in general because we are reducing the number @@ -9980,7 +10690,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { } } - if (N0.getOpcode() == ISD::FADD && AllowNewConst) { + if (N0.getOpcode() == ISD::FADD) { bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); // (fadd (fadd x, x), x) -> (fmul x, 3.0) if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) && @@ -9990,7 +10700,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { } } - if (N1.getOpcode() == ISD::FADD && AllowNewConst) { + if (N1.getOpcode() == ISD::FADD) { bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); // (fadd x, (fadd x, x)) -> (fmul x, 3.0) if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && @@ -10001,8 +10711,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { } // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0) - if (AllowNewConst && - N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && + if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1) && N1.getOperand(0) == N1.getOperand(1) && N0.getOperand(0) == N1.getOperand(0)) { @@ -10042,15 +10751,23 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; - // fold (fsub A, (fneg B)) -> (fadd A, B) - if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) - return DAG.getNode(ISD::FADD, DL, VT, N0, - GetNegatedExpression(N1, DAG, LegalOperations), Flags); + // (fsub A, 0) -> A + if (N1CFP && N1CFP->isZero()) { + if (!N1CFP->isNegative() || Options.UnsafeFPMath || + Flags.hasNoSignedZeros()) { + return N0; + } + } - // FIXME: Auto-upgrade the target/function-level option. - if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) { - // (fsub 0, B) -> -B - if (N0CFP && N0CFP->isZero()) { + if (N0 == N1) { + // (fsub x, x) -> 0.0 + if (Options.UnsafeFPMath || Flags.hasNoNaNs()) + return DAG.getConstantFP(0.0f, DL, VT); + } + + // (fsub 0, B) -> -B + if (N0CFP && N0CFP->isZero()) { + if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) { if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) return GetNegatedExpression(N1, DAG, LegalOperations); if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) @@ -10058,16 +10775,13 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } } + // fold (fsub A, (fneg B)) -> (fadd A, B) + if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) + return DAG.getNode(ISD::FADD, DL, VT, N0, + GetNegatedExpression(N1, DAG, LegalOperations), Flags); + // If 'unsafe math' is enabled, fold lots of things. if (Options.UnsafeFPMath) { - // (fsub A, 0) -> A - if (N1CFP && N1CFP->isZero()) - return N0; - - // (fsub x, x) -> 0.0 - if (N0 == N1) - return DAG.getConstantFP(0.0f, DL, VT); - // (fsub x, (fadd x, y)) -> (fneg y) // (fsub x, (fadd y, x)) -> (fneg y) if (N1.getOpcode() == ISD::FADD) { @@ -10124,12 +10838,15 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; - if (Options.UnsafeFPMath) { + if (Options.UnsafeFPMath || + (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) { // fold (fmul A, 0) -> 0 if (N1CFP && N1CFP->isZero()) return N1; + } - // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) + if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) { + // fmul (fmul X, C1), C2 -> fmul X, C1 * C2 if (N0.getOpcode() == ISD::FMUL) { // Fold scalars or any vector constants (not just splats). // This fold is done in general by InstCombine, but extra fmul insts @@ -10153,13 +10870,10 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { } } - // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c)) - // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs - // during an early run of DAGCombiner can prevent folding with fmuls - // inserted during lowering. - if (N0.getOpcode() == ISD::FADD && - (N0.getOperand(0) == N0.getOperand(1)) && - N0.hasOneUse()) { + // Match a special-case: we convert X * 2.0 into fadd. + // fmul (fadd X, X), C -> fmul X, 2.0 * C + if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() && + N0.getOperand(0) == N0.getOperand(1)) { const SDValue Two = DAG.getConstantFP(2.0, DL, VT); SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags); return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags); @@ -10253,6 +10967,10 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; + // FMA nodes have flags that propagate to the created nodes. + const SDNodeFlags Flags = N->getFlags(); + bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N); + // Constant fold FMA. if (isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1) && @@ -10260,7 +10978,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2); } - if (Options.UnsafeFPMath) { + if (UnsafeFPMath) { if (N0CFP && N0CFP->isZero()) return N2; if (N1CFP && N1CFP->isZero()) @@ -10277,12 +10995,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { !isConstantFPBuildVectorOrConstantFP(N1)) return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2); - // TODO: FMA nodes should have flags that propagate to the created nodes. - // For now, create a Flags object for use with all unsafe math transforms. - SDNodeFlags Flags; - Flags.setUnsafeAlgebra(true); - - if (Options.UnsafeFPMath) { + if (UnsafeFPMath) { // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) && isConstantFPBuildVectorOrConstantFP(N1) && @@ -10328,7 +11041,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { } } - if (Options.UnsafeFPMath) { + if (UnsafeFPMath) { // (fma x, c, x) -> (fmul x, (c+1)) if (N1CFP && N0 == N2) { return DAG.getNode(ISD::FMUL, DL, VT, N0, @@ -10435,7 +11148,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; - if (Options.UnsafeFPMath) { + if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) { // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. if (N1CFP) { // Compute the reciprocal 1.0 / c2. @@ -10544,17 +11257,16 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { } SDValue DAGCombiner::visitFSQRT(SDNode *N) { - if (!DAG.getTarget().Options.UnsafeFPMath) + SDNodeFlags Flags = N->getFlags(); + if (!DAG.getTarget().Options.UnsafeFPMath && + !Flags.hasApproximateFuncs()) return SDValue(); SDValue N0 = N->getOperand(0); if (TLI.isFsqrtCheap(N0, DAG)) return SDValue(); - // TODO: FSQRT nodes should have flags that propagate to the created nodes. - // For now, create a Flags object for use with all unsafe math transforms. - SDNodeFlags Flags; - Flags.setUnsafeAlgebra(true); + // FSQRT nodes have flags that propagate to the created nodes. return buildSqrtEstimate(N0, Flags); } @@ -10622,6 +11334,41 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { return SDValue(); } +static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG, + const TargetLowering &TLI) { + // This optimization is guarded by a function attribute because it may produce + // unexpected results. Ie, programs may be relying on the platform-specific + // undefined behavior when the float-to-int conversion overflows. + const Function &F = DAG.getMachineFunction().getFunction(); + Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow"); + if (StrictOverflow.getValueAsString().equals("false")) + return SDValue(); + + // We only do this if the target has legal ftrunc. Otherwise, we'd likely be + // replacing casts with a libcall. We also must be allowed to ignore -0.0 + // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer + // conversions would return +0.0. + // FIXME: We should be able to use node-level FMF here. + // TODO: If strict math, should we use FABS (+ range check for signed cast)? + EVT VT = N->getValueType(0); + if (!TLI.isOperationLegal(ISD::FTRUNC, VT) || + !DAG.getTarget().Options.NoSignedZerosFPMath) + return SDValue(); + + // fptosi/fptoui round towards zero, so converting from FP to integer and + // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X + SDValue N0 = N->getOperand(0); + if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT && + N0.getOperand(0).getValueType() == VT) + return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0)); + + if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT && + N0.getOperand(0).getValueType() == VT) + return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0)); + + return SDValue(); +} + SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -10673,6 +11420,9 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { } } + if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI)) + return FTrunc; + return SDValue(); } @@ -10712,6 +11462,9 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { } } + if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI)) + return FTrunc; + return SDValue(); } @@ -11118,16 +11871,22 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { N1.getOperand(0), N1.getOperand(1), N2); } - if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) || - ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) && - (N1.getOperand(0).hasOneUse() && - N1.getOperand(0).getOpcode() == ISD::SRL))) { - SDNode *Trunc = nullptr; - if (N1.getOpcode() == ISD::TRUNCATE) { - // Look pass the truncate. - Trunc = N1.getNode(); - N1 = N1.getOperand(0); - } + if (N1.hasOneUse()) { + if (SDValue NewN1 = rebuildSetCC(N1)) + return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2); + } + + return SDValue(); +} + +SDValue DAGCombiner::rebuildSetCC(SDValue N) { + if (N.getOpcode() == ISD::SRL || + (N.getOpcode() == ISD::TRUNCATE && + (N.getOperand(0).hasOneUse() && + N.getOperand(0).getOpcode() == ISD::SRL))) { + // Look pass the truncate. + if (N.getOpcode() == ISD::TRUNCATE) + N = N.getOperand(0); // Match this pattern so that we can generate simpler code: // @@ -11146,74 +11905,55 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { // This applies only when the AND constant value has one bit set and the // SRL constant is equal to the log2 of the AND constant. The back-end is // smart enough to convert the result into a TEST/JMP sequence. - SDValue Op0 = N1.getOperand(0); - SDValue Op1 = N1.getOperand(1); + SDValue Op0 = N.getOperand(0); + SDValue Op1 = N.getOperand(1); - if (Op0.getOpcode() == ISD::AND && - Op1.getOpcode() == ISD::Constant) { + if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) { SDValue AndOp1 = Op0.getOperand(1); if (AndOp1.getOpcode() == ISD::Constant) { const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue(); if (AndConst.isPowerOf2() && - cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) { + cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) { SDLoc DL(N); - SDValue SetCC = - DAG.getSetCC(DL, - getSetCCResultType(Op0.getValueType()), - Op0, DAG.getConstant(0, DL, Op0.getValueType()), - ISD::SETNE); - - SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL, - MVT::Other, Chain, SetCC, N2); - // Don't add the new BRCond into the worklist or else SimplifySelectCC - // will convert it back to (X & C1) >> C2. - CombineTo(N, NewBRCond, false); - // Truncate is dead. - if (Trunc) - deleteAndRecombine(Trunc); - // Replace the uses of SRL with SETCC - WorklistRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(N1, SetCC); - deleteAndRecombine(N1.getNode()); - return SDValue(N, 0); // Return N so it doesn't get rechecked! + return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()), + Op0, DAG.getConstant(0, DL, Op0.getValueType()), + ISD::SETNE); } } } - - if (Trunc) - // Restore N1 if the above transformation doesn't match. - N1 = N->getOperand(1); } // Transform br(xor(x, y)) -> br(x != y) // Transform br(xor(xor(x,y), 1)) -> br (x == y) - if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) { - SDNode *TheXor = N1.getNode(); + if (N.getOpcode() == ISD::XOR) { + // Because we may call this on a speculatively constructed + // SimplifiedSetCC Node, we need to simplify this node first. + // Ideally this should be folded into SimplifySetCC and not + // here. For now, grab a handle to N so we don't lose it from + // replacements interal to the visit. + HandleSDNode XORHandle(N); + while (N.getOpcode() == ISD::XOR) { + SDValue Tmp = visitXOR(N.getNode()); + // No simplification done. + if (!Tmp.getNode()) + break; + // Returning N is form in-visit replacement that may invalidated + // N. Grab value from Handle. + if (Tmp.getNode() == N.getNode()) + N = XORHandle.getValue(); + else // Node simplified. Try simplifying again. + N = Tmp; + } + + if (N.getOpcode() != ISD::XOR) + return N; + + SDNode *TheXor = N.getNode(); + SDValue Op0 = TheXor->getOperand(0); SDValue Op1 = TheXor->getOperand(1); - if (Op0.getOpcode() == Op1.getOpcode()) { - // Avoid missing important xor optimizations. - if (SDValue Tmp = visitXOR(TheXor)) { - if (Tmp.getNode() != TheXor) { - DEBUG(dbgs() << "\nReplacing.8 "; - TheXor->dump(&DAG); - dbgs() << "\nWith: "; - Tmp.getNode()->dump(&DAG); - dbgs() << '\n'); - WorklistRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(N1, Tmp); - deleteAndRecombine(TheXor); - return DAG.getNode(ISD::BRCOND, SDLoc(N), - MVT::Other, Chain, Tmp, N2); - } - - // visitXOR has changed XOR's operands or replaced the XOR completely, - // bail out. - return SDValue(N, 0); - } - } if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) { bool Equal = false; @@ -11223,19 +11963,12 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { Equal = true; } - EVT SetCCVT = N1.getValueType(); + EVT SetCCVT = N.getValueType(); if (LegalTypes) SetCCVT = getSetCCResultType(SetCCVT); - SDValue SetCC = DAG.getSetCC(SDLoc(TheXor), - SetCCVT, - Op0, Op1, - Equal ? ISD::SETEQ : ISD::SETNE); // Replace the uses of XOR with SETCC - WorklistRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(N1, SetCC); - deleteAndRecombine(N1.getNode()); - return DAG.getNode(ISD::BRCOND, SDLoc(N), - MVT::Other, Chain, SetCC, N2); + return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1, + Equal ? ISD::SETEQ : ISD::SETNE); } } @@ -11467,11 +12200,8 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { BasePtr, Offset, AM); ++PreIndexedNodes; ++NodesCombined; - DEBUG(dbgs() << "\nReplacing.4 "; - N->dump(&DAG); - dbgs() << "\nWith: "; - Result.getNode()->dump(&DAG); - dbgs() << '\n'); + LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: "; + Result.getNode()->dump(&DAG); dbgs() << '\n'); WorklistRemover DeadNodes(*this); if (isLoad) { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); @@ -11636,11 +12366,9 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { BasePtr, Offset, AM); ++PostIndexedNodes; ++NodesCombined; - DEBUG(dbgs() << "\nReplacing.5 "; - N->dump(&DAG); - dbgs() << "\nWith: "; - Result.getNode()->dump(&DAG); - dbgs() << '\n'); + LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); + dbgs() << "\nWith: "; Result.getNode()->dump(&DAG); + dbgs() << '\n'); WorklistRemover DeadNodes(*this); if (isLoad) { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); @@ -11664,7 +12392,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { return false; } -/// \brief Return the base-pointer arithmetic from an indexed \p LD. +/// Return the base-pointer arithmetic from an indexed \p LD. SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) { ISD::MemIndexedMode AM = LD->getAddressingMode(); assert(AM != ISD::UNINDEXED); @@ -11706,11 +12434,9 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // v3 = add v2, c // Now we replace use of chain2 with chain1. This makes the second load // isomorphic to the one we are deleting, and thus makes this load live. - DEBUG(dbgs() << "\nReplacing.6 "; - N->dump(&DAG); - dbgs() << "\nWith chain: "; - Chain.getNode()->dump(&DAG); - dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG); + dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG); + dbgs() << "\n"); WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); AddUsersToWorklist(Chain.getNode()); @@ -11741,11 +12467,9 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { AddUsersToWorklist(N); } else Index = DAG.getUNDEF(N->getValueType(1)); - DEBUG(dbgs() << "\nReplacing.7 "; - N->dump(&DAG); - dbgs() << "\nWith: "; - Undef.getNode()->dump(&DAG); - dbgs() << " and 2 other values\n"); + LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG); + dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG); + dbgs() << " and 2 other values\n"); WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index); @@ -11773,13 +12497,14 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // Try to infer better alignment information than the load already has. if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { - if (Align > LD->getMemOperand()->getBaseAlignment()) { + if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) { SDValue NewLoad = DAG.getExtLoad( LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr, LD->getPointerInfo(), LD->getMemoryVT(), Align, LD->getMemOperand()->getFlags(), LD->getAAInfo()); - if (NewLoad.getNode() != N) - return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); + // NewLoad will always be N as we are only refining the alignment + assert(NewLoad.getNode() == N); + (void)NewLoad; } } } @@ -11826,7 +12551,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { namespace { -/// \brief Helper structure used to slice a load in smaller loads. +/// Helper structure used to slice a load in smaller loads. /// Basically a slice is obtained from the following sequence: /// Origin = load Ty1, Base /// Shift = srl Ty1 Origin, CstTy Amount @@ -11839,7 +12564,7 @@ namespace { /// SliceTy is deduced from the number of bits that are actually used to /// build Inst. struct LoadedSlice { - /// \brief Helper structure used to compute the cost of a slice. + /// Helper structure used to compute the cost of a slice. struct Cost { /// Are we optimizing for code size. bool ForCodeSize; @@ -11853,7 +12578,7 @@ struct LoadedSlice { Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {} - /// \brief Get the cost of one isolated slice. + /// Get the cost of one isolated slice. Cost(const LoadedSlice &LS, bool ForCodeSize = false) : ForCodeSize(ForCodeSize), Loads(1) { EVT TruncType = LS.Inst->getValueType(0); @@ -11863,7 +12588,7 @@ struct LoadedSlice { ZExts = 1; } - /// \brief Account for slicing gain in the current cost. + /// Account for slicing gain in the current cost. /// Slicing provide a few gains like removing a shift or a /// truncate. This method allows to grow the cost of the original /// load with the gain from this slice. @@ -11936,7 +12661,7 @@ struct LoadedSlice { unsigned Shift = 0, SelectionDAG *DAG = nullptr) : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {} - /// \brief Get the bits used in a chunk of bits \p BitWidth large. + /// Get the bits used in a chunk of bits \p BitWidth large. /// \return Result is \p BitWidth and has used bits set to 1 and /// not used bits set to 0. APInt getUsedBits() const { @@ -11956,14 +12681,14 @@ struct LoadedSlice { return UsedBits; } - /// \brief Get the size of the slice to be loaded in bytes. + /// Get the size of the slice to be loaded in bytes. unsigned getLoadedSize() const { unsigned SliceSize = getUsedBits().countPopulation(); assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte."); return SliceSize / 8; } - /// \brief Get the type that will be loaded for this slice. + /// Get the type that will be loaded for this slice. /// Note: This may not be the final type for the slice. EVT getLoadedType() const { assert(DAG && "Missing context"); @@ -11971,7 +12696,7 @@ struct LoadedSlice { return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8); } - /// \brief Get the alignment of the load used for this slice. + /// Get the alignment of the load used for this slice. unsigned getAlignment() const { unsigned Alignment = Origin->getAlignment(); unsigned Offset = getOffsetFromBase(); @@ -11980,7 +12705,7 @@ struct LoadedSlice { return Alignment; } - /// \brief Check if this slice can be rewritten with legal operations. + /// Check if this slice can be rewritten with legal operations. bool isLegal() const { // An invalid slice is not legal. if (!Origin || !Inst || !DAG) @@ -12024,7 +12749,7 @@ struct LoadedSlice { return true; } - /// \brief Get the offset in bytes of this slice in the original chunk of + /// Get the offset in bytes of this slice in the original chunk of /// bits. /// \pre DAG != nullptr. uint64_t getOffsetFromBase() const { @@ -12045,7 +12770,7 @@ struct LoadedSlice { return Offset; } - /// \brief Generate the sequence of instructions to load the slice + /// Generate the sequence of instructions to load the slice /// represented by this object and redirect the uses of this slice to /// this new sequence of instructions. /// \pre this->Inst && this->Origin are valid Instructions and this @@ -12083,7 +12808,7 @@ struct LoadedSlice { return LastInst; } - /// \brief Check if this slice can be merged with an expensive cross register + /// Check if this slice can be merged with an expensive cross register /// bank copy. E.g., /// i = load i32 /// f = bitcast i32 i to float @@ -12132,7 +12857,7 @@ struct LoadedSlice { } // end anonymous namespace -/// \brief Check that all bits set in \p UsedBits form a dense region, i.e., +/// Check that all bits set in \p UsedBits form a dense region, i.e., /// \p UsedBits looks like 0..0 1..1 0..0. static bool areUsedBitsDense(const APInt &UsedBits) { // If all the bits are one, this is dense! @@ -12148,7 +12873,7 @@ static bool areUsedBitsDense(const APInt &UsedBits) { return NarrowedUsedBits.isAllOnesValue(); } -/// \brief Check whether or not \p First and \p Second are next to each other +/// Check whether or not \p First and \p Second are next to each other /// in memory. This means that there is no hole between the bits loaded /// by \p First and the bits loaded by \p Second. static bool areSlicesNextToEachOther(const LoadedSlice &First, @@ -12162,7 +12887,7 @@ static bool areSlicesNextToEachOther(const LoadedSlice &First, return areUsedBitsDense(UsedBits); } -/// \brief Adjust the \p GlobalLSCost according to the target +/// Adjust the \p GlobalLSCost according to the target /// paring capabilities and the layout of the slices. /// \pre \p GlobalLSCost should account for at least as many loads as /// there is in the slices in \p LoadedSlices. @@ -12175,8 +12900,8 @@ static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices, // Sort the slices so that elements that are likely to be next to each // other in memory are next to each other in the list. - std::sort(LoadedSlices.begin(), LoadedSlices.end(), - [](const LoadedSlice &LHS, const LoadedSlice &RHS) { + llvm::sort(LoadedSlices.begin(), LoadedSlices.end(), + [](const LoadedSlice &LHS, const LoadedSlice &RHS) { assert(LHS.Origin == RHS.Origin && "Different bases not implemented."); return LHS.getOffsetFromBase() < RHS.getOffsetFromBase(); }); @@ -12223,7 +12948,7 @@ static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices, } } -/// \brief Check the profitability of all involved LoadedSlice. +/// Check the profitability of all involved LoadedSlice. /// Currently, it is considered profitable if there is exactly two /// involved slices (1) which are (2) next to each other in memory, and /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3). @@ -12267,7 +12992,7 @@ static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices, return OrigCost > GlobalSlicingCost; } -/// \brief If the given load, \p LI, is used only by trunc or trunc(lshr) +/// If the given load, \p LI, is used only by trunc or trunc(lshr) /// operations, split it in the various pieces being extracted. /// /// This sort of thing is introduced by SROA. @@ -12386,22 +13111,6 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0)); if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer. - // The store should be chained directly to the load or be an operand of a - // tokenfactor. - if (LD == Chain.getNode()) - ; // ok. - else if (Chain->getOpcode() != ISD::TokenFactor) - return Result; // Fail. - else { - bool isOk = false; - for (const SDValue &ChainOp : Chain->op_values()) - if (ChainOp.getNode() == LD) { - isOk = true; - break; - } - if (!isOk) return Result; - } - // This only handles simple types. if (V.getValueType() != MVT::i16 && V.getValueType() != MVT::i32 && @@ -12438,6 +13147,24 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { // is aligned the same as the access width. if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result; + // For narrowing to be valid, it must be the case that the load the + // immediately preceeding memory operation before the store. + if (LD == Chain.getNode()) + ; // ok. + else if (Chain->getOpcode() == ISD::TokenFactor && + SDValue(LD, 1).hasOneUse()) { + // LD has only 1 chain use so they are no indirect dependencies. + bool isOk = false; + for (const SDValue &ChainOp : Chain->op_values()) + if (ChainOp.getNode() == LD) { + isOk = true; + break; + } + if (!isOk) + return Result; + } else + return Result; // Fail. + Result.first = MaskedBytes; Result.second = NotMaskTZ/8; return Result; @@ -12756,12 +13483,6 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, return false; } -static SDValue peekThroughBitcast(SDValue V) { - while (V.getOpcode() == ISD::BITCAST) - V = V.getOperand(0); - return V; -} - SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores) { SmallVector<SDValue, 8> Chains; @@ -12886,6 +13607,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode); SDValue Val = St->getValue(); + Val = peekThroughBitcast(Val); StoreInt <<= ElementSizeBits; if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) { StoreInt |= C->getAPIntValue() @@ -12918,13 +13640,13 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( FirstInChain->getPointerInfo(), FirstInChain->getAlignment()); } else { // Must be realized as a trunc store - EVT LegalizedStoredValueTy = + EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType()); - unsigned LegalizedStoreSize = LegalizedStoredValueTy.getSizeInBits(); + unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits(); ConstantSDNode *C = cast<ConstantSDNode>(StoredVal); SDValue ExtendedStoreVal = DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL, - LegalizedStoredValueTy); + LegalizedStoredValTy); NewStore = DAG.getTruncStore( NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/, @@ -12941,7 +13663,8 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( } void DAGCombiner::getStoreMergeCandidates( - StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes) { + StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes, + SDNode *&RootNode) { // This holds the base pointer, index, and the offset in bytes from the base // pointer. BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG); @@ -12970,6 +13693,12 @@ void DAGCombiner::getStoreMergeCandidates( // Load and store should be the same type. if (MemVT != LoadVT) return; + // Loads must only have one use. + if (!Ld->hasNUsesOfValue(1, 0)) + return; + // The memory operands must not be volatile. + if (Ld->isVolatile() || Ld->isIndexed()) + return; } auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr, int64_t &Offset) -> bool { @@ -12987,6 +13716,12 @@ void DAGCombiner::getStoreMergeCandidates( auto LPtr = BaseIndexOffset::match(OtherLd, DAG); if (LoadVT != OtherLd->getMemoryVT()) return false; + // Loads must only have one use. + if (!OtherLd->hasNUsesOfValue(1, 0)) + return false; + // The memory operands must not be volatile. + if (OtherLd->isVolatile() || OtherLd->isIndexed()) + return false; if (!(LBasePtr.equalBaseIndex(LPtr, DAG))) return false; } else @@ -13028,7 +13763,7 @@ void DAGCombiner::getStoreMergeCandidates( // FIXME: We should be able to climb and // descend TokenFactors to find candidates as well. - SDNode *RootNode = (St->getChain()).getNode(); + RootNode = St->getChain().getNode(); if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) { RootNode = Ldn->getChain().getNode(); @@ -13059,31 +13794,54 @@ void DAGCombiner::getStoreMergeCandidates( // through the chain). Check in parallel by searching up from // non-chain operands of candidates. bool DAGCombiner::checkMergeStoreCandidatesForDependencies( - SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores) { + SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores, + SDNode *RootNode) { // FIXME: We should be able to truncate a full search of // predecessors by doing a BFS and keeping tabs the originating // stores from which worklist nodes come from in a similar way to // TokenFactor simplfication. - SmallPtrSet<const SDNode *, 16> Visited; + SmallPtrSet<const SDNode *, 32> Visited; SmallVector<const SDNode *, 8> Worklist; - unsigned int Max = 8192; + + // RootNode is a predecessor to all candidates so we need not search + // past it. Add RootNode (peeking through TokenFactors). Do not count + // these towards size check. + + Worklist.push_back(RootNode); + while (!Worklist.empty()) { + auto N = Worklist.pop_back_val(); + if (N->getOpcode() == ISD::TokenFactor) { + for (SDValue Op : N->ops()) + Worklist.push_back(Op.getNode()); + } + Visited.insert(N); + } + + // Don't count pruning nodes towards max. + unsigned int Max = 1024 + Visited.size(); // Search Ops of store candidates. for (unsigned i = 0; i < NumStores; ++i) { - SDNode *n = StoreNodes[i].MemNode; - // Potential loops may happen only through non-chain operands - for (unsigned j = 1; j < n->getNumOperands(); ++j) - Worklist.push_back(n->getOperand(j).getNode()); + SDNode *N = StoreNodes[i].MemNode; + // Of the 4 Store Operands: + // * Chain (Op 0) -> We have already considered these + // in candidate selection and can be + // safely ignored + // * Value (Op 1) -> Cycles may happen (e.g. through load chains) + // * Address (Op 2) -> Merged addresses may only vary by a fixed constant + // and so no cycles are possible. + // * (Op 3) -> appears to always be undef. Cannot be source of cycle. + // + // Thus we need only check predecessors of the value operands. + auto *Op = N->getOperand(1).getNode(); + if (Visited.insert(Op).second) + Worklist.push_back(Op); } // Search through DAG. We can stop early if we find a store node. - for (unsigned i = 0; i < NumStores; ++i) { + for (unsigned i = 0; i < NumStores; ++i) if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist, Max)) return false; - // Check if we ended early, failing conservatively if so. - if (Visited.size() >= Max) - return false; - } return true; } @@ -13121,8 +13879,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { return false; SmallVector<MemOpLink, 8> StoreNodes; + SDNode *RootNode; // Find potential store merge candidates by searching through chain sub-DAG - getStoreMergeCandidates(St, StoreNodes); + getStoreMergeCandidates(St, StoreNodes, RootNode); // Check if there is anything to merge. if (StoreNodes.size() < 2) @@ -13130,10 +13889,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { // Sort the memory operands according to their distance from the // base pointer. - std::sort(StoreNodes.begin(), StoreNodes.end(), - [](MemOpLink LHS, MemOpLink RHS) { - return LHS.OffsetFromBase < RHS.OffsetFromBase; - }); + llvm::sort(StoreNodes.begin(), StoreNodes.end(), + [](MemOpLink LHS, MemOpLink RHS) { + return LHS.OffsetFromBase < RHS.OffsetFromBase; + }); // Store Merge attempts to merge the lowest stores. This generally // works out as if successful, as the remaining stores are checked @@ -13177,178 +13936,191 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { continue; } - // Check that we can merge these candidates without causing a cycle - if (!checkMergeStoreCandidatesForDependencies(StoreNodes, - NumConsecutiveStores)) { - StoreNodes.erase(StoreNodes.begin(), - StoreNodes.begin() + NumConsecutiveStores); - continue; - } - // The node with the lowest store address. LLVMContext &Context = *DAG.getContext(); const DataLayout &DL = DAG.getDataLayout(); // Store the constants into memory as one consecutive store. if (IsConstantSrc) { - LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; - unsigned FirstStoreAS = FirstInChain->getAddressSpace(); - unsigned FirstStoreAlign = FirstInChain->getAlignment(); - unsigned LastLegalType = 1; - unsigned LastLegalVectorType = 1; - bool LastIntegerTrunc = false; - bool NonZero = false; - unsigned FirstZeroAfterNonZero = NumConsecutiveStores; - for (unsigned i = 0; i < NumConsecutiveStores; ++i) { - StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode); - SDValue StoredVal = ST->getValue(); - bool IsElementZero = false; - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) - IsElementZero = C->isNullValue(); - else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) - IsElementZero = C->getConstantFPValue()->isNullValue(); - if (IsElementZero) { - if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores) - FirstZeroAfterNonZero = i; - } - NonZero |= !IsElementZero; + while (NumConsecutiveStores >= 2) { + LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + unsigned FirstStoreAS = FirstInChain->getAddressSpace(); + unsigned FirstStoreAlign = FirstInChain->getAlignment(); + unsigned LastLegalType = 1; + unsigned LastLegalVectorType = 1; + bool LastIntegerTrunc = false; + bool NonZero = false; + unsigned FirstZeroAfterNonZero = NumConsecutiveStores; + for (unsigned i = 0; i < NumConsecutiveStores; ++i) { + StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode); + SDValue StoredVal = ST->getValue(); + bool IsElementZero = false; + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) + IsElementZero = C->isNullValue(); + else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) + IsElementZero = C->getConstantFPValue()->isNullValue(); + if (IsElementZero) { + if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores) + FirstZeroAfterNonZero = i; + } + NonZero |= !IsElementZero; - // Find a legal type for the constant store. - unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; - EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits); - bool IsFast = false; - if (TLI.isTypeLegal(StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && - TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, - FirstStoreAlign, &IsFast) && - IsFast) { - LastIntegerTrunc = false; - LastLegalType = i + 1; - // Or check whether a truncstore is legal. - } else if (TLI.getTypeAction(Context, StoreTy) == - TargetLowering::TypePromoteInteger) { - EVT LegalizedStoredValueTy = - TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); - if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) && + // Find a legal type for the constant store. + unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; + EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits); + bool IsFast = false; + + // Break early when size is too large to be legal. + if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits) + break; + + if (TLI.isTypeLegal(StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, FirstStoreAlign, &IsFast) && IsFast) { - LastIntegerTrunc = true; + LastIntegerTrunc = false; LastLegalType = i + 1; + // Or check whether a truncstore is legal. + } else if (TLI.getTypeAction(Context, StoreTy) == + TargetLowering::TypePromoteInteger) { + EVT LegalizedStoredValTy = + TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); + if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, + FirstStoreAlign, &IsFast) && + IsFast) { + LastIntegerTrunc = true; + LastLegalType = i + 1; + } } - } - // We only use vectors if the constant is known to be zero or the target - // allows it and the function is not marked with the noimplicitfloat - // attribute. - if ((!NonZero || - TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) && - !NoVectors) { - // Find a legal type for the vector store. - unsigned Elts = (i + 1) * NumMemElts; - EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); - if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) && - TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && - TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, - FirstStoreAlign, &IsFast) && - IsFast) - LastLegalVectorType = i + 1; + // We only use vectors if the constant is known to be zero or the + // target allows it and the function is not marked with the + // noimplicitfloat attribute. + if ((!NonZero || + TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) && + !NoVectors) { + // Find a legal type for the vector store. + unsigned Elts = (i + 1) * NumMemElts; + EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); + if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) && + TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && + TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, + FirstStoreAlign, &IsFast) && + IsFast) + LastLegalVectorType = i + 1; + } } - } - bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; - unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType; + bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; + unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType; + + // Check if we found a legal integer type that creates a meaningful + // merge. + if (NumElem < 2) { + // We know that candidate stores are in order and of correct + // shape. While there is no mergeable sequence from the + // beginning one may start later in the sequence. The only + // reason a merge of size N could have failed where another of + // the same size would not have, is if the alignment has + // improved or we've dropped a non-zero value. Drop as many + // candidates as we can here. + unsigned NumSkip = 1; + while ( + (NumSkip < NumConsecutiveStores) && + (NumSkip < FirstZeroAfterNonZero) && + (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) + NumSkip++; + + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); + NumConsecutiveStores -= NumSkip; + continue; + } - // Check if we found a legal integer type that creates a meaningful merge. - if (NumElem < 2) { - // We know that candidate stores are in order and of correct - // shape. While there is no mergeable sequence from the - // beginning one may start later in the sequence. The only - // reason a merge of size N could have failed where another of - // the same size would not have, is if the alignment has - // improved or we've dropped a non-zero value. Drop as many - // candidates as we can here. - unsigned NumSkip = 1; - while ( - (NumSkip < NumConsecutiveStores) && - (NumSkip < FirstZeroAfterNonZero) && - (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) { - NumSkip++; + // Check that we can merge these candidates without causing a cycle. + if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem, + RootNode)) { + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); + NumConsecutiveStores -= NumElem; + continue; } - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); - continue; - } - bool Merged = MergeStoresOfConstantsOrVecElts( - StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc); - RV |= Merged; + RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true, + UseVector, LastIntegerTrunc); - // Remove merged stores for next iteration. - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); + // Remove merged stores for next iteration. + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); + NumConsecutiveStores -= NumElem; + } continue; } // When extracting multiple vector elements, try to store them // in one vector store rather than a sequence of scalar stores. if (IsExtractVecSrc) { - LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; - unsigned FirstStoreAS = FirstInChain->getAddressSpace(); - unsigned FirstStoreAlign = FirstInChain->getAlignment(); - unsigned NumStoresToMerge = 1; - for (unsigned i = 0; i < NumConsecutiveStores; ++i) { - StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); - SDValue StVal = peekThroughBitcast(St->getValue()); - // This restriction could be loosened. - // Bail out if any stored values are not elements extracted from a - // vector. It should be possible to handle mixed sources, but load - // sources need more careful handling (see the block of code below that - // handles consecutive loads). - if (StVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT && - StVal.getOpcode() != ISD::EXTRACT_SUBVECTOR) - return RV; + // Loop on Consecutive Stores on success. + while (NumConsecutiveStores >= 2) { + LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + unsigned FirstStoreAS = FirstInChain->getAddressSpace(); + unsigned FirstStoreAlign = FirstInChain->getAlignment(); + unsigned NumStoresToMerge = 1; + for (unsigned i = 0; i < NumConsecutiveStores; ++i) { + // Find a legal type for the vector store. + unsigned Elts = (i + 1) * NumMemElts; + EVT Ty = + EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); + bool IsFast; - // Find a legal type for the vector store. - unsigned Elts = (i + 1) * NumMemElts; - EVT Ty = - EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); - bool IsFast; - if (TLI.isTypeLegal(Ty) && - TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && - TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, - FirstStoreAlign, &IsFast) && - IsFast) - NumStoresToMerge = i + 1; - } + // Break early when size is too large to be legal. + if (Ty.getSizeInBits() > MaximumLegalStoreInBits) + break; - // Check if we found a legal integer type that creates a meaningful merge. - if (NumStoresToMerge < 2) { - // We know that candidate stores are in order and of correct - // shape. While there is no mergeable sequence from the - // beginning one may start later in the sequence. The only - // reason a merge of size N could have failed where another of - // the same size would not have, is if the alignment has - // improved. Drop as many candidates as we can here. - unsigned NumSkip = 1; - while ((NumSkip < NumConsecutiveStores) && - (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) - NumSkip++; + if (TLI.isTypeLegal(Ty) && + TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && + TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, + FirstStoreAlign, &IsFast) && + IsFast) + NumStoresToMerge = i + 1; + } - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); - continue; - } + // Check if we found a legal integer type creating a meaningful + // merge. + if (NumStoresToMerge < 2) { + // We know that candidate stores are in order and of correct + // shape. While there is no mergeable sequence from the + // beginning one may start later in the sequence. The only + // reason a merge of size N could have failed where another of + // the same size would not have, is if the alignment has + // improved. Drop as many candidates as we can here. + unsigned NumSkip = 1; + while ( + (NumSkip < NumConsecutiveStores) && + (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) + NumSkip++; + + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); + NumConsecutiveStores -= NumSkip; + continue; + } + + // Check that we can merge these candidates without causing a cycle. + if (!checkMergeStoreCandidatesForDependencies( + StoreNodes, NumStoresToMerge, RootNode)) { + StoreNodes.erase(StoreNodes.begin(), + StoreNodes.begin() + NumStoresToMerge); + NumConsecutiveStores -= NumStoresToMerge; + continue; + } + + RV |= MergeStoresOfConstantsOrVecElts( + StoreNodes, MemVT, NumStoresToMerge, false, true, false); - bool Merged = MergeStoresOfConstantsOrVecElts( - StoreNodes, MemVT, NumStoresToMerge, false, true, false); - if (!Merged) { StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge); - continue; + NumConsecutiveStores -= NumStoresToMerge; } - // Remove merged stores for next iteration. - StoreNodes.erase(StoreNodes.begin(), - StoreNodes.begin() + NumStoresToMerge); - RV = true; continue; } @@ -13362,24 +14134,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { // Find acceptable loads. Loads need to have the same chain (token factor), // must not be zext, volatile, indexed, and they must be consecutive. BaseIndexOffset LdBasePtr; + for (unsigned i = 0; i < NumConsecutiveStores; ++i) { StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); SDValue Val = peekThroughBitcast(St->getValue()); - LoadSDNode *Ld = dyn_cast<LoadSDNode>(Val); - if (!Ld) - break; - - // Loads must only have one use. - if (!Ld->hasNUsesOfValue(1, 0)) - break; - - // The memory operands must not be volatile. - if (Ld->isVolatile() || Ld->isIndexed()) - break; - - // The stored memory type must be the same. - if (Ld->getMemoryVT() != MemVT) - break; + LoadSDNode *Ld = cast<LoadSDNode>(Val); BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG); // If this is not the first ptr that we check. @@ -13397,90 +14156,75 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { LoadNodes.push_back(MemOpLink(Ld, LdOffset)); } - if (LoadNodes.size() < 2) { - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1); - continue; - } + while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) { + // If we have load/store pair instructions and we only have two values, + // don't bother merging. + unsigned RequiredAlignment; + if (LoadNodes.size() == 2 && + TLI.hasPairedLoad(MemVT, RequiredAlignment) && + StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) { + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2); + LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2); + break; + } + LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + unsigned FirstStoreAS = FirstInChain->getAddressSpace(); + unsigned FirstStoreAlign = FirstInChain->getAlignment(); + LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode); + unsigned FirstLoadAS = FirstLoad->getAddressSpace(); + unsigned FirstLoadAlign = FirstLoad->getAlignment(); - // If we have load/store pair instructions and we only have two values, - // don't bother merging. - unsigned RequiredAlignment; - if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) && - StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) { - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2); - continue; - } - LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; - unsigned FirstStoreAS = FirstInChain->getAddressSpace(); - unsigned FirstStoreAlign = FirstInChain->getAlignment(); - LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode); - unsigned FirstLoadAS = FirstLoad->getAddressSpace(); - unsigned FirstLoadAlign = FirstLoad->getAlignment(); + // Scan the memory operations on the chain and find the first + // non-consecutive load memory address. These variables hold the index in + // the store node array. - // Scan the memory operations on the chain and find the first - // non-consecutive load memory address. These variables hold the index in - // the store node array. - unsigned LastConsecutiveLoad = 1; - // This variable refers to the size and not index in the array. - unsigned LastLegalVectorType = 1; - unsigned LastLegalIntegerType = 1; - bool isDereferenceable = true; - bool DoIntegerTruncate = false; - StartAddress = LoadNodes[0].OffsetFromBase; - SDValue FirstChain = FirstLoad->getChain(); - for (unsigned i = 1; i < LoadNodes.size(); ++i) { - // All loads must share the same chain. - if (LoadNodes[i].MemNode->getChain() != FirstChain) - break; + unsigned LastConsecutiveLoad = 1; - int64_t CurrAddress = LoadNodes[i].OffsetFromBase; - if (CurrAddress - StartAddress != (ElementSizeBytes * i)) - break; - LastConsecutiveLoad = i; - - if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable()) - isDereferenceable = false; - - // Find a legal type for the vector store. - unsigned Elts = (i + 1) * NumMemElts; - EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); - - bool IsFastSt, IsFastLd; - if (TLI.isTypeLegal(StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && - TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, - FirstStoreAlign, &IsFastSt) && - IsFastSt && - TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, - FirstLoadAlign, &IsFastLd) && - IsFastLd) { - LastLegalVectorType = i + 1; - } + // This variable refers to the size and not index in the array. + unsigned LastLegalVectorType = 1; + unsigned LastLegalIntegerType = 1; + bool isDereferenceable = true; + bool DoIntegerTruncate = false; + StartAddress = LoadNodes[0].OffsetFromBase; + SDValue FirstChain = FirstLoad->getChain(); + for (unsigned i = 1; i < LoadNodes.size(); ++i) { + // All loads must share the same chain. + if (LoadNodes[i].MemNode->getChain() != FirstChain) + break; + + int64_t CurrAddress = LoadNodes[i].OffsetFromBase; + if (CurrAddress - StartAddress != (ElementSizeBytes * i)) + break; + LastConsecutiveLoad = i; + + if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable()) + isDereferenceable = false; + + // Find a legal type for the vector store. + unsigned Elts = (i + 1) * NumMemElts; + EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); + + // Break early when size is too large to be legal. + if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits) + break; + + bool IsFastSt, IsFastLd; + if (TLI.isTypeLegal(StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, + FirstStoreAlign, &IsFastSt) && + IsFastSt && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, + FirstLoadAlign, &IsFastLd) && + IsFastLd) { + LastLegalVectorType = i + 1; + } - // Find a legal type for the integer store. - unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; - StoreTy = EVT::getIntegerVT(Context, SizeInBits); - if (TLI.isTypeLegal(StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && - TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, - FirstStoreAlign, &IsFastSt) && - IsFastSt && - TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, - FirstLoadAlign, &IsFastLd) && - IsFastLd) { - LastLegalIntegerType = i + 1; - DoIntegerTruncate = false; - // Or check whether a truncstore and extload is legal. - } else if (TLI.getTypeAction(Context, StoreTy) == - TargetLowering::TypePromoteInteger) { - EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoreTy); - if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) && - TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, - StoreTy) && - TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, - StoreTy) && - TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) && + // Find a legal type for the integer store. + unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; + StoreTy = EVT::getIntegerVT(Context, SizeInBits); + if (TLI.isTypeLegal(StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, FirstStoreAlign, &IsFastSt) && IsFastSt && @@ -13488,105 +14232,140 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { FirstLoadAlign, &IsFastLd) && IsFastLd) { LastLegalIntegerType = i + 1; - DoIntegerTruncate = true; + DoIntegerTruncate = false; + // Or check whether a truncstore and extload is legal. + } else if (TLI.getTypeAction(Context, StoreTy) == + TargetLowering::TypePromoteInteger) { + EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy); + if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) && + TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, + StoreTy) && + TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, + StoreTy) && + TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, + FirstStoreAlign, &IsFastSt) && + IsFastSt && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, + FirstLoadAlign, &IsFastLd) && + IsFastLd) { + LastLegalIntegerType = i + 1; + DoIntegerTruncate = true; + } } } - } - // Only use vector types if the vector type is larger than the integer type. - // If they are the same, use integers. - bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors; - unsigned LastLegalType = - std::max(LastLegalVectorType, LastLegalIntegerType); - - // We add +1 here because the LastXXX variables refer to location while - // the NumElem refers to array/index size. - unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1); - NumElem = std::min(LastLegalType, NumElem); - - if (NumElem < 2) { - // We know that candidate stores are in order and of correct - // shape. While there is no mergeable sequence from the - // beginning one may start later in the sequence. The only - // reason a merge of size N could have failed where another of - // the same size would not have is if the alignment or either - // the load or store has improved. Drop as many candidates as we - // can here. - unsigned NumSkip = 1; - while ((NumSkip < LoadNodes.size()) && - (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) && - (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) - NumSkip++; - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); - continue; - } + // Only use vector types if the vector type is larger than the integer + // type. If they are the same, use integers. + bool UseVectorTy = + LastLegalVectorType > LastLegalIntegerType && !NoVectors; + unsigned LastLegalType = + std::max(LastLegalVectorType, LastLegalIntegerType); - // Find if it is better to use vectors or integers to load and store - // to memory. - EVT JointMemOpVT; - if (UseVectorTy) { - // Find a legal type for the vector store. - unsigned Elts = NumElem * NumMemElts; - JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); - } else { - unsigned SizeInBits = NumElem * ElementSizeBytes * 8; - JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits); - } - - SDLoc LoadDL(LoadNodes[0].MemNode); - SDLoc StoreDL(StoreNodes[0].MemNode); - - // The merged loads are required to have the same incoming chain, so - // using the first's chain is acceptable. - - SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem); - AddToWorklist(NewStoreChain.getNode()); - - MachineMemOperand::Flags MMOFlags = isDereferenceable ? - MachineMemOperand::MODereferenceable: - MachineMemOperand::MONone; - - SDValue NewLoad, NewStore; - if (UseVectorTy || !DoIntegerTruncate) { - NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(), - FirstLoad->getBasePtr(), - FirstLoad->getPointerInfo(), FirstLoadAlign, - MMOFlags); - NewStore = DAG.getStore(NewStoreChain, StoreDL, NewLoad, - FirstInChain->getBasePtr(), - FirstInChain->getPointerInfo(), FirstStoreAlign); - } else { // This must be the truncstore/extload case - EVT ExtendedTy = - TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT); - NewLoad = - DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, FirstLoad->getChain(), - FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(), - JointMemOpVT, FirstLoadAlign, MMOFlags); - NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad, - FirstInChain->getBasePtr(), - FirstInChain->getPointerInfo(), JointMemOpVT, - FirstInChain->getAlignment(), - FirstInChain->getMemOperand()->getFlags()); - } - - // Transfer chain users from old loads to the new load. - for (unsigned i = 0; i < NumElem; ++i) { - LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode); - DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), - SDValue(NewLoad.getNode(), 1)); - } - - // Replace the all stores with the new store. Recursively remove - // corresponding value if its no longer used. - for (unsigned i = 0; i < NumElem; ++i) { - SDValue Val = StoreNodes[i].MemNode->getOperand(1); - CombineTo(StoreNodes[i].MemNode, NewStore); - if (Val.getNode()->use_empty()) - recursivelyDeleteUnusedNodes(Val.getNode()); - } - - RV = true; - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); + // We add +1 here because the LastXXX variables refer to location while + // the NumElem refers to array/index size. + unsigned NumElem = + std::min(NumConsecutiveStores, LastConsecutiveLoad + 1); + NumElem = std::min(LastLegalType, NumElem); + + if (NumElem < 2) { + // We know that candidate stores are in order and of correct + // shape. While there is no mergeable sequence from the + // beginning one may start later in the sequence. The only + // reason a merge of size N could have failed where another of + // the same size would not have is if the alignment or either + // the load or store has improved. Drop as many candidates as we + // can here. + unsigned NumSkip = 1; + while ((NumSkip < LoadNodes.size()) && + (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) && + (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) + NumSkip++; + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); + LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip); + NumConsecutiveStores -= NumSkip; + continue; + } + + // Check that we can merge these candidates without causing a cycle. + if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem, + RootNode)) { + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); + LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem); + NumConsecutiveStores -= NumElem; + continue; + } + + // Find if it is better to use vectors or integers to load and store + // to memory. + EVT JointMemOpVT; + if (UseVectorTy) { + // Find a legal type for the vector store. + unsigned Elts = NumElem * NumMemElts; + JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); + } else { + unsigned SizeInBits = NumElem * ElementSizeBytes * 8; + JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits); + } + + SDLoc LoadDL(LoadNodes[0].MemNode); + SDLoc StoreDL(StoreNodes[0].MemNode); + + // The merged loads are required to have the same incoming chain, so + // using the first's chain is acceptable. + + SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem); + AddToWorklist(NewStoreChain.getNode()); + + MachineMemOperand::Flags MMOFlags = + isDereferenceable ? MachineMemOperand::MODereferenceable + : MachineMemOperand::MONone; + + SDValue NewLoad, NewStore; + if (UseVectorTy || !DoIntegerTruncate) { + NewLoad = + DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(), + FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(), + FirstLoadAlign, MMOFlags); + NewStore = DAG.getStore( + NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), FirstStoreAlign); + } else { // This must be the truncstore/extload case + EVT ExtendedTy = + TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT); + NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, + FirstLoad->getChain(), FirstLoad->getBasePtr(), + FirstLoad->getPointerInfo(), JointMemOpVT, + FirstLoadAlign, MMOFlags); + NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad, + FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), + JointMemOpVT, FirstInChain->getAlignment(), + FirstInChain->getMemOperand()->getFlags()); + } + + // Transfer chain users from old loads to the new load. + for (unsigned i = 0; i < NumElem; ++i) { + LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode); + DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), + SDValue(NewLoad.getNode(), 1)); + } + + // Replace the all stores with the new store. Recursively remove + // corresponding value if its no longer used. + for (unsigned i = 0; i < NumElem; ++i) { + SDValue Val = StoreNodes[i].MemNode->getOperand(1); + CombineTo(StoreNodes[i].MemNode, NewStore); + if (Val.getNode()->use_empty()) + recursivelyDeleteUnusedNodes(Val.getNode()); + } + + RV = true; + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); + LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem); + NumConsecutiveStores -= NumElem; + } } return RV; } @@ -13728,13 +14507,14 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // Try to infer better alignment information than the store already has. if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { - if (Align > ST->getAlignment()) { + if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) { SDValue NewStore = DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(), ST->getMemoryVT(), Align, ST->getMemOperand()->getFlags(), ST->getAAInfo()); - if (NewStore.getNode() != N) - return CombineTo(ST, NewStore, true); + // NewStore will always be N as we are only refining the alignment + assert(NewStore.getNode() == N); + (void)NewStore; } } } @@ -14216,6 +14996,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { SDValue EltNo = N->getOperand(1); ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo); + // extract_vector_elt of out-of-bounds element -> UNDEF + if (ConstEltNo && ConstEltNo->getAPIntValue().uge(VT.getVectorNumElements())) + return DAG.getUNDEF(NVT); + // extract_vector_elt (build_vector x, y), 1 -> y if (ConstEltNo && InVec.getOpcode() == ISD::BUILD_VECTOR && @@ -14301,6 +15085,23 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { } } + // If only EXTRACT_VECTOR_ELT nodes use the source vector we can + // simplify it based on the (valid) extraction indices. + if (llvm::all_of(InVec->uses(), [&](SDNode *Use) { + return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT && + Use->getOperand(0) == InVec && + isa<ConstantSDNode>(Use->getOperand(1)); + })) { + APInt DemandedElts = APInt::getNullValue(VT.getVectorNumElements()); + for (SDNode *Use : InVec->uses()) { + auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1)); + if (CstElt->getAPIntValue().ult(VT.getVectorNumElements())) + DemandedElts.setBit(CstElt->getZExtValue()); + } + if (SimplifyDemandedVectorElts(InVec, DemandedElts, true)) + return SDValue(N, 0); + } + bool BCNumEltsChanged = false; EVT ExtVT = VT.getVectorElementType(); EVT LVT = ExtVT; @@ -14507,7 +15308,10 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { assert(VecVT.getSizeInBits() == VT.getSizeInBits() && "Invalid vector size"); // Check if the new vector type is legal. - if (!isTypeLegal(VecVT)) return SDValue(); + if (!isTypeLegal(VecVT) || + (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) && + TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))) + return SDValue(); // Make the new BUILD_VECTOR. SDValue BV = DAG.getBuildVector(VecVT, DL, Ops); @@ -14754,12 +15558,16 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { } // Not an undef or zero. If the input is something other than an - // EXTRACT_VECTOR_ELT with a constant index, bail out. + // EXTRACT_VECTOR_ELT with an in-range constant index, bail out. if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT || !isa<ConstantSDNode>(Op.getOperand(1))) return SDValue(); SDValue ExtractedFromVec = Op.getOperand(0); + APInt ExtractIdx = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue(); + if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements())) + return SDValue(); + // All inputs must have the same element type as the output. if (VT.getVectorElementType() != ExtractedFromVec.getValueType().getVectorElementType()) @@ -14915,6 +15723,54 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { return Shuffles[0]; } +// Try to turn a build vector of zero extends of extract vector elts into a +// a vector zero extend and possibly an extract subvector. +// TODO: Support sign extend or any extend? +// TODO: Allow undef elements? +// TODO: Don't require the extracts to start at element 0. +SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) { + if (LegalOperations) + return SDValue(); + + EVT VT = N->getValueType(0); + + SDValue Op0 = N->getOperand(0); + auto checkElem = [&](SDValue Op) -> int64_t { + if (Op.getOpcode() == ISD::ZERO_EXTEND && + Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT && + Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0)) + if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1))) + return C->getZExtValue(); + return -1; + }; + + // Make sure the first element matches + // (zext (extract_vector_elt X, C)) + int64_t Offset = checkElem(Op0); + if (Offset < 0) + return SDValue(); + + unsigned NumElems = N->getNumOperands(); + SDValue In = Op0.getOperand(0).getOperand(0); + EVT InSVT = In.getValueType().getScalarType(); + EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems); + + // Don't create an illegal input type after type legalization. + if (LegalTypes && !TLI.isTypeLegal(InVT)) + return SDValue(); + + // Ensure all the elements come from the same vector and are adjacent. + for (unsigned i = 1; i != NumElems; ++i) { + if ((Offset + i) != checkElem(N->getOperand(i))) + return SDValue(); + } + + SDLoc DL(N); + In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In, + Op0.getOperand(0).getOperand(1)); + return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, In); +} + SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { EVT VT = N->getValueType(0); @@ -14922,6 +15778,32 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (ISD::allOperandsUndef(N)) return DAG.getUNDEF(VT); + // If this is a splat of a bitcast from another vector, change to a + // concat_vector. + // For example: + // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) -> + // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X)))) + // + // If X is a build_vector itself, the concat can become a larger build_vector. + // TODO: Maybe this is useful for non-splat too? + if (!LegalOperations) { + if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) { + Splat = peekThroughBitcast(Splat); + EVT SrcVT = Splat.getValueType(); + if (SrcVT.isVector()) { + unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements(); + EVT NewVT = EVT::getVectorVT(*DAG.getContext(), + SrcVT.getVectorElementType(), NumElts); + if (!LegalTypes || TLI.isTypeLegal(NewVT)) { + SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat); + SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), + NewVT, Ops); + return DAG.getBitcast(VT, Concat); + } + } + } + } + // Check if we can express BUILD VECTOR via subvector extract. if (!LegalTypes && (N->getNumOperands() > 1)) { SDValue Op0 = N->getOperand(0); @@ -14951,6 +15833,9 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { Op0.getOperand(0), Op0.getOperand(1)); } + if (SDValue V = convertBuildVecZextToZext(N)) + return V; + if (SDValue V = reduceBuildVecExtToExtBuildVec(N)) return V; @@ -15140,6 +16025,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { if (!SclTy.isFloatingPoint() && !SclTy.isInteger()) return SDValue(); + // Bail out if the vector size is not a multiple of the scalar size. + if (VT.getSizeInBits() % SclTy.getSizeInBits()) + return SDValue(); + unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits(); if (VNTNumElms < 2) return SDValue(); @@ -15418,13 +16307,22 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { // Only do this if we won't split any elements. if (ExtractSize % EltSize == 0) { unsigned NumElems = ExtractSize / EltSize; - EVT ExtractVT = EVT::getVectorVT(*DAG.getContext(), - InVT.getVectorElementType(), NumElems); - if ((!LegalOperations || - TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT)) && + EVT EltVT = InVT.getVectorElementType(); + EVT ExtractVT = NumElems == 1 ? EltVT : + EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems); + if ((Level < AfterLegalizeDAG || + (NumElems == 1 || + TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) && (!LegalTypes || TLI.isTypeLegal(ExtractVT))) { unsigned IdxVal = (Idx->getZExtValue() * NVT.getScalarSizeInBits()) / EltSize; + if (NumElems == 1) { + SDValue Src = V->getOperand(IdxVal); + if (EltVT != Src.getValueType()) + Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src); + + return DAG.getBitcast(NVT, Src); + } // Extract the pieces from the original build_vector. SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N), @@ -15466,122 +16364,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG)) return NarrowBOp; - return SDValue(); -} - -static SDValue simplifyShuffleOperandRecursively(SmallBitVector &UsedElements, - SDValue V, SelectionDAG &DAG) { - SDLoc DL(V); - EVT VT = V.getValueType(); - - switch (V.getOpcode()) { - default: - return V; - - case ISD::CONCAT_VECTORS: { - EVT OpVT = V->getOperand(0).getValueType(); - int OpSize = OpVT.getVectorNumElements(); - SmallBitVector OpUsedElements(OpSize, false); - bool FoundSimplification = false; - SmallVector<SDValue, 4> NewOps; - NewOps.reserve(V->getNumOperands()); - for (int i = 0, NumOps = V->getNumOperands(); i < NumOps; ++i) { - SDValue Op = V->getOperand(i); - bool OpUsed = false; - for (int j = 0; j < OpSize; ++j) - if (UsedElements[i * OpSize + j]) { - OpUsedElements[j] = true; - OpUsed = true; - } - NewOps.push_back( - OpUsed ? simplifyShuffleOperandRecursively(OpUsedElements, Op, DAG) - : DAG.getUNDEF(OpVT)); - FoundSimplification |= Op == NewOps.back(); - OpUsedElements.reset(); - } - if (FoundSimplification) - V = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, NewOps); - return V; - } - - case ISD::INSERT_SUBVECTOR: { - SDValue BaseV = V->getOperand(0); - SDValue SubV = V->getOperand(1); - auto *IdxN = dyn_cast<ConstantSDNode>(V->getOperand(2)); - if (!IdxN) - return V; - - int SubSize = SubV.getValueType().getVectorNumElements(); - int Idx = IdxN->getZExtValue(); - bool SubVectorUsed = false; - SmallBitVector SubUsedElements(SubSize, false); - for (int i = 0; i < SubSize; ++i) - if (UsedElements[i + Idx]) { - SubVectorUsed = true; - SubUsedElements[i] = true; - UsedElements[i + Idx] = false; - } - - // Now recurse on both the base and sub vectors. - SDValue SimplifiedSubV = - SubVectorUsed - ? simplifyShuffleOperandRecursively(SubUsedElements, SubV, DAG) - : DAG.getUNDEF(SubV.getValueType()); - SDValue SimplifiedBaseV = simplifyShuffleOperandRecursively(UsedElements, BaseV, DAG); - if (SimplifiedSubV != SubV || SimplifiedBaseV != BaseV) - V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, - SimplifiedBaseV, SimplifiedSubV, V->getOperand(2)); - return V; - } - } -} - -static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0, - SDValue N1, SelectionDAG &DAG) { - EVT VT = SVN->getValueType(0); - int NumElts = VT.getVectorNumElements(); - SmallBitVector N0UsedElements(NumElts, false), N1UsedElements(NumElts, false); - for (int M : SVN->getMask()) - if (M >= 0 && M < NumElts) - N0UsedElements[M] = true; - else if (M >= NumElts) - N1UsedElements[M - NumElts] = true; - - SDValue S0 = simplifyShuffleOperandRecursively(N0UsedElements, N0, DAG); - SDValue S1 = simplifyShuffleOperandRecursively(N1UsedElements, N1, DAG); - if (S0 == N0 && S1 == N1) - return SDValue(); - - return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask()); -} - -static SDValue simplifyShuffleMask(ShuffleVectorSDNode *SVN, SDValue N0, - SDValue N1, SelectionDAG &DAG) { - auto isUndefElt = [](SDValue V, int Idx) { - // TODO - handle more cases as required. - if (V.getOpcode() == ISD::BUILD_VECTOR) - return V.getOperand(Idx).isUndef(); - if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) - return (Idx != 0) || V.getOperand(0).isUndef(); - return false; - }; - - EVT VT = SVN->getValueType(0); - unsigned NumElts = VT.getVectorNumElements(); - - bool Changed = false; - SmallVector<int, 8> NewMask; - for (unsigned i = 0; i != NumElts; ++i) { - int Idx = SVN->getMaskElt(i); - if ((0 <= Idx && Idx < (int)NumElts && isUndefElt(N0, Idx)) || - ((int)NumElts < Idx && isUndefElt(N1, Idx - NumElts))) { - Changed = true; - Idx = -1; - } - NewMask.push_back(Idx); - } - if (Changed) - return DAG.getVectorShuffle(VT, SDLoc(SVN), N0, N1, NewMask); + if (SimplifyDemandedVectorElts(SDValue(N, 0))) + return SDValue(N, 0); return SDValue(); } @@ -16028,10 +16812,6 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask); } - // Simplify shuffle mask if a referenced element is UNDEF. - if (SDValue V = simplifyShuffleMask(SVN, N0, N1, DAG)) - return V; - if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG)) return InsElt; @@ -16092,11 +16872,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } } - // There are various patterns used to build up a vector from smaller vectors, - // subvectors, or elements. Scan chains of these and replace unused insertions - // or components with undef. - if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG)) - return S; + // Simplify source operands based on shuffle mask. + if (SimplifyDemandedVectorElts(SDValue(N, 0))) + return SDValue(N, 0); // Match shuffles that can be converted to any_vector_extend_in_reg. if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations, LegalTypes)) @@ -16422,10 +17200,11 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) { SDValue CN0 = N0.getOperand(0); SDValue CN1 = N1.getOperand(0); - if (CN0.getValueType().getVectorElementType() == - CN1.getValueType().getVectorElementType() && - CN0.getValueType().getVectorNumElements() == - VT.getVectorNumElements()) { + EVT CN0VT = CN0.getValueType(); + EVT CN1VT = CN1.getValueType(); + if (CN0VT.isVector() && CN1VT.isVector() && + CN0VT.getVectorElementType() == CN1VT.getVectorElementType() && + CN0VT.getVectorNumElements() == VT.getVectorNumElements()) { SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), CN0.getValueType(), CN0, CN1, N2); return DAG.getBitcast(VT, NewINSERT); @@ -16680,14 +17459,14 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, const ConstantFPSDNode *Zero = nullptr; if (TheSelect->getOpcode() == ISD::SELECT_CC) { - CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get(); + CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get(); CmpLHS = TheSelect->getOperand(0); Zero = isConstOrConstSplatFP(TheSelect->getOperand(1)); } else { // SELECT or VSELECT SDValue Cmp = TheSelect->getOperand(0); if (Cmp.getOpcode() == ISD::SETCC) { - CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get(); + CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get(); CmpLHS = Cmp.getOperand(0); Zero = isConstOrConstSplatFP(Cmp.getOperand(1)); } @@ -16905,24 +17684,6 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, return !SCCC->isNullValue() ? N2 : N3; } - // Check to see if we can simplify the select into an fabs node - if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) { - // Allow either -0.0 or 0.0 - if (CFP->isZero()) { - // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs - if ((CC == ISD::SETGE || CC == ISD::SETGT) && - N0 == N2 && N3.getOpcode() == ISD::FNEG && - N2 == N3.getOperand(0)) - return DAG.getNode(ISD::FABS, DL, VT, N0); - - // select (setl[te] X, +/-0.0), fneg(X), X -> fabs - if ((CC == ISD::SETLT || CC == ISD::SETLE) && - N0 == N3 && N2.getOpcode() == ISD::FNEG && - N2.getOperand(0) == N3) - return DAG.getNode(ISD::FABS, DL, VT, N3); - } - } - // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)" // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0 // in it. This is a win when the constant is not otherwise available because @@ -17400,19 +18161,34 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal); if (!Reciprocal) { - // Unfortunately, Est is now NaN if the input was exactly 0.0. - // Select out this case and force the answer to 0.0. + // The estimate is now completely wrong if the input was exactly 0.0 or + // possibly a denormal. Force the answer to 0.0 for those cases. EVT VT = Op.getValueType(); SDLoc DL(Op); - - SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); EVT CCVT = getSetCCResultType(VT); - SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ); - AddToWorklist(ZeroCmp.getNode()); - - Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT, - ZeroCmp, FPZero, Est); - AddToWorklist(Est.getNode()); + ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT; + const Function &F = DAG.getMachineFunction().getFunction(); + Attribute Denorms = F.getFnAttribute("denormal-fp-math"); + if (Denorms.getValueAsString().equals("ieee")) { + // fabs(X) < SmallestNormal ? 0.0 : Est + const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT); + APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem); + SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT); + SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); + SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op); + SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT); + Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est); + AddToWorklist(Fabs.getNode()); + AddToWorklist(IsDenorm.getNode()); + AddToWorklist(Est.getNode()); + } else { + // X == 0.0 ? 0.0 : Est + SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); + SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ); + Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est); + AddToWorklist(IsZero.getNode()); + AddToWorklist(Est.getNode()); + } } } return Est; @@ -17715,7 +18491,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { Index = nullptr; break; } - } // end while + }// end while } // At this point, ChainedStores lists all of the Store nodes diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 3c856914053b..e4a9d557d386 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -61,7 +61,6 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" @@ -99,6 +98,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" @@ -113,6 +113,11 @@ using namespace llvm; #define DEBUG_TYPE "isel" +// FIXME: Remove this after the feature has proven reliable. +static cl::opt<bool> SinkLocalValues("fast-isel-sink-local-values", + cl::init(true), cl::Hidden, + cl::desc("Sink local values in FastISel")); + STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by " "target-independent selector"); STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by " @@ -120,9 +125,10 @@ STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by " STATISTIC(NumFastIselDead, "Number of dead insts removed on failure"); /// Set the current block to which generated machine instructions will be -/// appended, and clear the local CSE map. +/// appended. void FastISel::startNewBlock() { - LocalValueMap.clear(); + assert(LocalValueMap.empty() && + "local values should be cleared after finishing a BB"); // Instructions are appended to FuncInfo.MBB. If the basic block already // contains labels or copies, use the last instruction as the last local @@ -133,6 +139,9 @@ void FastISel::startNewBlock() { LastLocalValue = EmitStartPt; } +/// Flush the local CSE map and sink anything we can. +void FastISel::finishBasicBlock() { flushLocalValueMap(); } + bool FastISel::lowerArguments() { if (!FuncInfo.CanLowerReturn) // Fallback to SDISel argument lowering code to deal with sret pointer @@ -153,11 +162,168 @@ bool FastISel::lowerArguments() { return true; } +/// Return the defined register if this instruction defines exactly one +/// virtual register and uses no other virtual registers. Otherwise return 0. +static unsigned findSinkableLocalRegDef(MachineInstr &MI) { + unsigned RegDef = 0; + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg()) + continue; + if (MO.isDef()) { + if (RegDef) + return 0; + RegDef = MO.getReg(); + } else if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + // This is another use of a vreg. Don't try to sink it. + return 0; + } + } + return RegDef; +} + void FastISel::flushLocalValueMap() { + // Try to sink local values down to their first use so that we can give them a + // better debug location. This has the side effect of shrinking local value + // live ranges, which helps out fast regalloc. + if (SinkLocalValues && LastLocalValue != EmitStartPt) { + // Sink local value materialization instructions between EmitStartPt and + // LastLocalValue. Visit them bottom-up, starting from LastLocalValue, to + // avoid inserting into the range that we're iterating over. + MachineBasicBlock::reverse_iterator RE = + EmitStartPt ? MachineBasicBlock::reverse_iterator(EmitStartPt) + : FuncInfo.MBB->rend(); + MachineBasicBlock::reverse_iterator RI(LastLocalValue); + + InstOrderMap OrderMap; + for (; RI != RE;) { + MachineInstr &LocalMI = *RI; + ++RI; + bool Store = true; + if (!LocalMI.isSafeToMove(nullptr, Store)) + continue; + unsigned DefReg = findSinkableLocalRegDef(LocalMI); + if (DefReg == 0) + continue; + + sinkLocalValueMaterialization(LocalMI, DefReg, OrderMap); + } + } + LocalValueMap.clear(); LastLocalValue = EmitStartPt; recomputeInsertPt(); SavedInsertPt = FuncInfo.InsertPt; + LastFlushPoint = FuncInfo.InsertPt; +} + +static bool isRegUsedByPhiNodes(unsigned DefReg, + FunctionLoweringInfo &FuncInfo) { + for (auto &P : FuncInfo.PHINodesToUpdate) + if (P.second == DefReg) + return true; + return false; +} + +/// Build a map of instruction orders. Return the first terminator and its +/// order. Consider EH_LABEL instructions to be terminators as well, since local +/// values for phis after invokes must be materialized before the call. +void FastISel::InstOrderMap::initialize( + MachineBasicBlock *MBB, MachineBasicBlock::iterator LastFlushPoint) { + unsigned Order = 0; + for (MachineInstr &I : *MBB) { + if (!FirstTerminator && + (I.isTerminator() || (I.isEHLabel() && &I != &MBB->front()))) { + FirstTerminator = &I; + FirstTerminatorOrder = Order; + } + Orders[&I] = Order++; + + // We don't need to order instructions past the last flush point. + if (I.getIterator() == LastFlushPoint) + break; + } +} + +void FastISel::sinkLocalValueMaterialization(MachineInstr &LocalMI, + unsigned DefReg, + InstOrderMap &OrderMap) { + // If this register is used by a register fixup, MRI will not contain all + // the uses until after register fixups, so don't attempt to sink or DCE + // this instruction. Register fixups typically come from no-op cast + // instructions, which replace the cast instruction vreg with the local + // value vreg. + if (FuncInfo.RegsWithFixups.count(DefReg)) + return; + + // We can DCE this instruction if there are no uses and it wasn't a + // materialized for a successor PHI node. + bool UsedByPHI = isRegUsedByPhiNodes(DefReg, FuncInfo); + if (!UsedByPHI && MRI.use_nodbg_empty(DefReg)) { + if (EmitStartPt == &LocalMI) + EmitStartPt = EmitStartPt->getPrevNode(); + LLVM_DEBUG(dbgs() << "removing dead local value materialization " + << LocalMI); + OrderMap.Orders.erase(&LocalMI); + LocalMI.eraseFromParent(); + return; + } + + // Number the instructions if we haven't yet so we can efficiently find the + // earliest use. + if (OrderMap.Orders.empty()) + OrderMap.initialize(FuncInfo.MBB, LastFlushPoint); + + // Find the first user in the BB. + MachineInstr *FirstUser = nullptr; + unsigned FirstOrder = std::numeric_limits<unsigned>::max(); + for (MachineInstr &UseInst : MRI.use_nodbg_instructions(DefReg)) { + auto I = OrderMap.Orders.find(&UseInst); + assert(I != OrderMap.Orders.end() && + "local value used by instruction outside local region"); + unsigned UseOrder = I->second; + if (UseOrder < FirstOrder) { + FirstOrder = UseOrder; + FirstUser = &UseInst; + } + } + + // The insertion point will be the first terminator or the first user, + // whichever came first. If there was no terminator, this must be a + // fallthrough block and the insertion point is the end of the block. + MachineBasicBlock::instr_iterator SinkPos; + if (UsedByPHI && OrderMap.FirstTerminatorOrder < FirstOrder) { + FirstOrder = OrderMap.FirstTerminatorOrder; + SinkPos = OrderMap.FirstTerminator->getIterator(); + } else if (FirstUser) { + SinkPos = FirstUser->getIterator(); + } else { + assert(UsedByPHI && "must be users if not used by a phi"); + SinkPos = FuncInfo.MBB->instr_end(); + } + + // Collect all DBG_VALUEs before the new insertion position so that we can + // sink them. + SmallVector<MachineInstr *, 1> DbgValues; + for (MachineInstr &DbgVal : MRI.use_instructions(DefReg)) { + if (!DbgVal.isDebugValue()) + continue; + unsigned UseOrder = OrderMap.Orders[&DbgVal]; + if (UseOrder < FirstOrder) + DbgValues.push_back(&DbgVal); + } + + // Sink LocalMI before SinkPos and assign it the same DebugLoc. + LLVM_DEBUG(dbgs() << "sinking local value to first use " << LocalMI); + FuncInfo.MBB->remove(&LocalMI); + FuncInfo.MBB->insert(SinkPos, &LocalMI); + if (SinkPos != FuncInfo.MBB->end()) + LocalMI.setDebugLoc(SinkPos->getDebugLoc()); + + // Sink any debug values that we've collected. + for (MachineInstr *DI : DbgValues) { + FuncInfo.MBB->remove(DI); + FuncInfo.MBB->insert(SinkPos, DI); + } } bool FastISel::hasTrivialKill(const Value *V) { @@ -328,8 +494,10 @@ void FastISel::updateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) { AssignedReg = Reg; else if (Reg != AssignedReg) { // Arrange for uses of AssignedReg to be replaced by uses of Reg. - for (unsigned i = 0; i < NumRegs; i++) + for (unsigned i = 0; i < NumRegs; i++) { FuncInfo.RegFixups[AssignedReg + i] = Reg + i; + FuncInfo.RegsWithFixups.insert(Reg + i); + } AssignedReg = Reg; } @@ -681,7 +849,7 @@ bool FastISel::selectStackmap(const CallInst *I) { return true; } -/// \brief Lower an argument list according to the target calling convention. +/// Lower an argument list according to the target calling convention. /// /// This is a helper for lowering intrinsics that follow a target calling /// convention or require stack pointer adjustment. Only a subset of the @@ -702,7 +870,7 @@ bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx, ArgListEntry Entry; Entry.Val = V; Entry.Ty = V->getType(); - Entry.setAttributes(&CS, ArgIdx); + Entry.setAttributes(&CS, ArgI); Args.push_back(Entry); } @@ -874,10 +1042,31 @@ bool FastISel::selectXRayCustomEvent(const CallInst *I) { TII.get(TargetOpcode::PATCHABLE_EVENT_CALL)); for (auto &MO : Ops) MIB.add(MO); + // Insert the Patchable Event Call instruction, that gets lowered properly. return true; } +bool FastISel::selectXRayTypedEvent(const CallInst *I) { + const auto &Triple = TM.getTargetTriple(); + if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux()) + return true; // don't do anything to this instruction. + SmallVector<MachineOperand, 8> Ops; + Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)), + /*IsDef=*/false)); + Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)), + /*IsDef=*/false)); + Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(2)), + /*IsDef=*/false)); + MachineInstrBuilder MIB = + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::PATCHABLE_TYPED_EVENT_CALL)); + for (auto &MO : Ops) + MIB.add(MO); + + // Insert the Patchable Typed Event Call instruction, that gets lowered properly. + return true; +} /// Returns an AttributeList representing the attributes applied to the return /// value of the given call. @@ -1141,13 +1330,13 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { const DbgDeclareInst *DI = cast<DbgDeclareInst>(II); assert(DI->getVariable() && "Missing variable"); if (!FuncInfo.MF->getMMI().hasDebugInfo()) { - DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); return true; } const Value *Address = DI->getAddress(); if (!Address || isa<UndefValue>(Address)) { - DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); return true; } @@ -1182,24 +1371,15 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { if (Op) { assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) && "Expected inlined-at fields to agree"); - if (Op->isReg()) { - Op->setIsDebug(true); - // A dbg.declare describes the address of a source variable, so lower it - // into an indirect DBG_VALUE. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true, - Op->getReg(), DI->getVariable(), DI->getExpression()); - } else - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::DBG_VALUE)) - .add(*Op) - .addImm(0) - .addMetadata(DI->getVariable()) - .addMetadata(DI->getExpression()); + // A dbg.declare describes the address of a source variable, so lower it + // into an indirect DBG_VALUE. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true, + *Op, DI->getVariable(), DI->getExpression()); } else { // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. - DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); } return true; } @@ -1242,7 +1422,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { } else { // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. - DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); } return true; } @@ -1256,7 +1436,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { updateValueMap(II, ResultReg); return true; } - case Intrinsic::invariant_group_barrier: + case Intrinsic::launder_invariant_group: + case Intrinsic::strip_invariant_group: case Intrinsic::expect: { unsigned ResultReg = getRegForValue(II->getArgOperand(0)); if (!ResultReg) @@ -1272,6 +1453,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { case Intrinsic::xray_customevent: return selectXRayCustomEvent(II); + case Intrinsic::xray_typedevent: + return selectXRayTypedEvent(II); } return fastLowerIntrinsicCall(II); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 81347fa4bd46..42c7181dac41 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/WasmEHFuncInfo.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -118,6 +119,10 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, } } } + if (Personality == EHPersonality::Wasm_CXX) { + WasmEHFuncInfo &EHInfo = *MF->getWasmEHFuncInfo(); + calculateWasmEHInfo(&fn, EHInfo); + } // Initialize the mapping of values to registers. This is only set up for // instruction values that are used outside of the block that defines @@ -226,9 +231,10 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, const Instruction *PadInst = BB.getFirstNonPHI(); // If this is a non-landingpad EH pad, mark this function as using // funclets. - // FIXME: SEH catchpads do not create funclets, so we could avoid setting - // this in such cases in order to improve frame layout. + // FIXME: SEH catchpads do not create EH scope/funclets, so we could avoid + // setting this in such cases in order to improve frame layout. if (!isa<LandingPadInst>(PadInst)) { + MF->setHasEHScopes(true); MF->setHasEHFunclets(true); MF->getFrameInfo().setHasOpaqueSPAdjustment(true); } @@ -281,28 +287,46 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, } } - if (!isFuncletEHPersonality(Personality)) - return; - - WinEHFuncInfo &EHInfo = *MF->getWinEHFuncInfo(); + if (isFuncletEHPersonality(Personality)) { + WinEHFuncInfo &EHInfo = *MF->getWinEHFuncInfo(); - // Map all BB references in the WinEH data to MBBs. - for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) { - for (WinEHHandlerType &H : TBME.HandlerArray) { - if (H.Handler) - H.Handler = MBBMap[H.Handler.get<const BasicBlock *>()]; + // Map all BB references in the WinEH data to MBBs. + for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) { + for (WinEHHandlerType &H : TBME.HandlerArray) { + if (H.Handler) + H.Handler = MBBMap[H.Handler.get<const BasicBlock *>()]; + } + } + for (CxxUnwindMapEntry &UME : EHInfo.CxxUnwindMap) + if (UME.Cleanup) + UME.Cleanup = MBBMap[UME.Cleanup.get<const BasicBlock *>()]; + for (SEHUnwindMapEntry &UME : EHInfo.SEHUnwindMap) { + const auto *BB = UME.Handler.get<const BasicBlock *>(); + UME.Handler = MBBMap[BB]; + } + for (ClrEHUnwindMapEntry &CME : EHInfo.ClrEHUnwindMap) { + const auto *BB = CME.Handler.get<const BasicBlock *>(); + CME.Handler = MBBMap[BB]; } } - for (CxxUnwindMapEntry &UME : EHInfo.CxxUnwindMap) - if (UME.Cleanup) - UME.Cleanup = MBBMap[UME.Cleanup.get<const BasicBlock *>()]; - for (SEHUnwindMapEntry &UME : EHInfo.SEHUnwindMap) { - const BasicBlock *BB = UME.Handler.get<const BasicBlock *>(); - UME.Handler = MBBMap[BB]; - } - for (ClrEHUnwindMapEntry &CME : EHInfo.ClrEHUnwindMap) { - const BasicBlock *BB = CME.Handler.get<const BasicBlock *>(); - CME.Handler = MBBMap[BB]; + + else if (Personality == EHPersonality::Wasm_CXX) { + WasmEHFuncInfo &EHInfo = *MF->getWasmEHFuncInfo(); + // Map all BB references in the WinEH data to MBBs. + DenseMap<BBOrMBB, BBOrMBB> NewMap; + for (auto &KV : EHInfo.EHPadUnwindMap) { + const auto *Src = KV.first.get<const BasicBlock *>(); + const auto *Dst = KV.second.get<const BasicBlock *>(); + NewMap[MBBMap[Src]] = MBBMap[Dst]; + } + EHInfo.EHPadUnwindMap = std::move(NewMap); + NewMap.clear(); + for (auto &KV : EHInfo.ThrowUnwindMap) { + const auto *Src = KV.first.get<const BasicBlock *>(); + const auto *Dst = KV.second.get<const BasicBlock *>(); + NewMap[MBBMap[Src]] = MBBMap[Dst]; + } + EHInfo.ThrowUnwindMap = std::move(NewMap); } } @@ -312,12 +336,14 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, void FunctionLoweringInfo::clear() { MBBMap.clear(); ValueMap.clear(); + VirtReg2Value.clear(); StaticAllocaMap.clear(); LiveOutRegInfo.clear(); VisitedBBs.clear(); ArgDbgValues.clear(); ByValArgFrameIndexMap.clear(); RegFixups.clear(); + RegsWithFixups.clear(); StatepointStackSlots.clear(); StatepointSpillMaps.clear(); PreferredExtendType.clear(); @@ -483,7 +509,7 @@ int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) { auto I = ByValArgFrameIndexMap.find(A); if (I != ByValArgFrameIndexMap.end()) return I->second; - DEBUG(dbgs() << "Argument does not have assigned frame index!\n"); + LLVM_DEBUG(dbgs() << "Argument does not have assigned frame index!\n"); return INT_MAX; } @@ -547,3 +573,13 @@ FunctionLoweringInfo::getOrCreateSwiftErrorVRegUseAt(const Instruction *I, const } return std::make_pair(It->second, false); } + +const Value * +FunctionLoweringInfo::getValueFromVirtualReg(unsigned Vreg) { + if (VirtReg2Value.empty()) { + for (auto &P : ValueMap) { + VirtReg2Value[P.second] = P.first; + } + } + return VirtReg2Value[Vreg]; +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index cc9b41b4b487..d6171f3177d7 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -394,11 +394,26 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, } else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) { MIB.addFPImm(F->getConstantFPValue()); } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) { + unsigned VReg = R->getReg(); + MVT OpVT = Op.getSimpleValueType(); + const TargetRegisterClass *OpRC = + TLI->isTypeLegal(OpVT) ? TLI->getRegClassFor(OpVT) : nullptr; + const TargetRegisterClass *IIRC = + II ? TRI->getAllocatableClass(TII->getRegClass(*II, IIOpNum, TRI, *MF)) + : nullptr; + + if (OpRC && IIRC && OpRC != IIRC && + TargetRegisterInfo::isVirtualRegister(VReg)) { + unsigned NewVReg = MRI->createVirtualRegister(IIRC); + BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(), + TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg); + VReg = NewVReg; + } // Turn additional physreg operands into implicit uses on non-variadic // instructions. This is used by call and return instructions passing // arguments in registers. bool Imp = II && (IIOpNum >= II->getNumOperands() && !II->isVariadic()); - MIB.addReg(R->getReg(), getImplRegState(Imp)); + MIB.addReg(VReg, getImplRegState(Imp)); } else if (RegisterMaskSDNode *RM = dyn_cast<RegisterMaskSDNode>(Op)) { MIB.addRegMask(RM->getRegMask()); } else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) { @@ -682,11 +697,15 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, if (SD->getKind() == SDDbgValue::FRAMEIX) { // Stack address; this needs to be lowered in target-dependent fashion. // EmitTargetCodeForFrameDebugValue is responsible for allocation. - return BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE)) - .addFrameIndex(SD->getFrameIx()) - .addImm(0) - .addMetadata(Var) - .addMetadata(Expr); + auto FrameMI = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE)) + .addFrameIndex(SD->getFrameIx()); + if (SD->isIndirect()) + // Push [fi + 0] onto the DIExpression stack. + FrameMI.addImm(0); + else + // Push fi onto the DIExpression stack. + FrameMI.addReg(0); + return FrameMI.addMetadata(Var).addMetadata(Expr); } // Otherwise, we're going to create an instruction here. const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE); @@ -705,6 +724,8 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, else AddOperand(MIB, Op, (*MIB).getNumOperands(), &II, VRBaseMap, /*IsDebug=*/true, /*IsClone=*/false, /*IsCloned=*/false); + } else if (SD->getKind() == SDDbgValue::VREG) { + MIB.addReg(SD->getVReg(), RegState::Debug); } else if (SD->getKind() == SDDbgValue::CONST) { const Value *V = SD->getConst(); if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { @@ -736,6 +757,20 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, return &*MIB; } +MachineInstr * +InstrEmitter::EmitDbgLabel(SDDbgLabel *SD) { + MDNode *Label = SD->getLabel(); + DebugLoc DL = SD->getDebugLoc(); + assert(cast<DILabel>(Label)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); + + const MCInstrDesc &II = TII->get(TargetOpcode::DBG_LABEL); + MachineInstrBuilder MIB = BuildMI(*MF, DL, II); + MIB.addMetadata(Label); + + return &*MIB; +} + /// EmitMachineNode - Generate machine code for a target-specific node and /// needed dependencies. /// @@ -807,9 +842,34 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // Add result register values for things that are defined by this // instruction. - if (NumResults) + if (NumResults) { CreateVirtualRegisters(Node, MIB, II, IsClone, IsCloned, VRBaseMap); + // Transfer any IR flags from the SDNode to the MachineInstr + MachineInstr *MI = MIB.getInstr(); + const SDNodeFlags Flags = Node->getFlags(); + if (Flags.hasNoSignedZeros()) + MI->setFlag(MachineInstr::MIFlag::FmNsz); + + if (Flags.hasAllowReciprocal()) + MI->setFlag(MachineInstr::MIFlag::FmArcp); + + if (Flags.hasNoNaNs()) + MI->setFlag(MachineInstr::MIFlag::FmNoNans); + + if (Flags.hasNoInfs()) + MI->setFlag(MachineInstr::MIFlag::FmNoInfs); + + if (Flags.hasAllowContract()) + MI->setFlag(MachineInstr::MIFlag::FmContract); + + if (Flags.hasApproximateFuncs()) + MI->setFlag(MachineInstr::MIFlag::FmAfn); + + if (Flags.hasAllowReassociation()) + MI->setFlag(MachineInstr::MIFlag::FmReassoc); + } + // Emit all of the actual operands of this instruction, adding them to the // instruction as appropriate. bool HasOptPRefs = NumDefs > NumResults; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h index 8a8a1bbd18f7..701b6368690b 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -113,6 +113,9 @@ public: MachineInstr *EmitDbgValue(SDDbgValue *SD, DenseMap<SDValue, unsigned> &VRBaseMap); + /// Generate machine instruction for a dbg_label node. + MachineInstr *EmitDbgLabel(SDDbgLabel *SD); + /// EmitNode - Generate machine code for a node and needed dependencies. /// void EmitNode(SDNode *Node, bool IsClone, bool IsCloned, diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 3a2fb0c0a836..2b7ba1ffb309 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -22,7 +22,6 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -41,6 +40,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" @@ -87,11 +87,11 @@ class SelectionDAGLegalize { const TargetLowering &TLI; SelectionDAG &DAG; - /// \brief The set of nodes which have already been legalized. We hold a + /// The set of nodes which have already been legalized. We hold a /// reference to it in order to update as necessary on node deletion. SmallPtrSetImpl<SDNode *> &LegalizedNodes; - /// \brief A set of all the nodes updated during legalization. + /// A set of all the nodes updated during legalization. SmallSetVector<SDNode *, 16> *UpdatedNodes; EVT getSetCCResultType(EVT VT) const { @@ -107,7 +107,7 @@ public: : TM(DAG.getTarget()), TLI(DAG.getTargetLoweringInfo()), DAG(DAG), LegalizedNodes(LegalizedNodes), UpdatedNodes(UpdatedNodes) {} - /// \brief Legalizes the given operation. + /// Legalizes the given operation. void LegalizeOp(SDNode *Node); private: @@ -167,7 +167,7 @@ private: SDValue NewIntValue) const; SDValue ExpandFCOPYSIGN(SDNode *Node) const; SDValue ExpandFABS(SDNode *Node) const; - SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, EVT DestVT, + SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, EVT DestVT, const SDLoc &dl); SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned, const SDLoc &dl); @@ -200,8 +200,8 @@ public: } void ReplaceNode(SDNode *Old, SDNode *New) { - DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG); - dbgs() << " with: "; New->dump(&DAG)); + LLVM_DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG); + dbgs() << " with: "; New->dump(&DAG)); assert(Old->getNumValues() == New->getNumValues() && "Replacing one node with another that produces a different number " @@ -213,8 +213,8 @@ public: } void ReplaceNode(SDValue Old, SDValue New) { - DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG); - dbgs() << " with: "; New->dump(&DAG)); + LLVM_DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG); + dbgs() << " with: "; New->dump(&DAG)); DAG.ReplaceAllUsesWith(Old, New); if (UpdatedNodes) @@ -223,13 +223,12 @@ public: } void ReplaceNode(SDNode *Old, const SDValue *New) { - DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG)); + LLVM_DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG)); DAG.ReplaceAllUsesWith(Old, New); for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) { - DEBUG(dbgs() << (i == 0 ? " with: " - : " and: "); - New[i]->dump(&DAG)); + LLVM_DEBUG(dbgs() << (i == 0 ? " with: " : " and: "); + New[i]->dump(&DAG)); if (UpdatedNodes) UpdatedNodes->insert(New[i].getNode()); } @@ -408,7 +407,7 @@ SDValue SelectionDAGLegalize::ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, } SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { - DEBUG(dbgs() << "Optimizing float store operations\n"); + LLVM_DEBUG(dbgs() << "Optimizing float store operations\n"); // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' // FIXME: We shouldn't do this for TargetConstantFP's. // FIXME: move this to the DAG Combiner! Note that we can't regress due @@ -477,7 +476,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { AAMDNodes AAInfo = ST->getAAInfo(); if (!ST->isTruncatingStore()) { - DEBUG(dbgs() << "Legalizing store operation\n"); + LLVM_DEBUG(dbgs() << "Legalizing store operation\n"); if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { ReplaceNode(ST, OptStore); return; @@ -495,15 +494,15 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { unsigned Align = ST->getAlignment(); const DataLayout &DL = DAG.getDataLayout(); if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) { - DEBUG(dbgs() << "Expanding unsupported unaligned store\n"); + LLVM_DEBUG(dbgs() << "Expanding unsupported unaligned store\n"); SDValue Result = TLI.expandUnalignedStore(ST, DAG); ReplaceNode(SDValue(ST, 0), Result); } else - DEBUG(dbgs() << "Legal store\n"); + LLVM_DEBUG(dbgs() << "Legal store\n"); break; } case TargetLowering::Custom: { - DEBUG(dbgs() << "Trying custom lowering\n"); + LLVM_DEBUG(dbgs() << "Trying custom lowering\n"); SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); if (Res && Res != SDValue(Node, 0)) ReplaceNode(SDValue(Node, 0), Res); @@ -524,7 +523,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { return; } - DEBUG(dbgs() << "Legalizing truncating store operations\n"); + LLVM_DEBUG(dbgs() << "Legalizing truncating store operations\n"); SDValue Value = ST->getValue(); EVT StVT = ST->getMemoryVT(); unsigned StWidth = StVT.getSizeInBits(); @@ -656,7 +655,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::NON_EXTLOAD) { - DEBUG(dbgs() << "Legalizing non-extending load operation\n"); + LLVM_DEBUG(dbgs() << "Legalizing non-extending load operation\n"); MVT VT = Node->getSimpleValueType(0); SDValue RVal = SDValue(Node, 0); SDValue RChain = SDValue(Node, 1); @@ -706,7 +705,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { return; } - DEBUG(dbgs() << "Legalizing extending load operation\n"); + LLVM_DEBUG(dbgs() << "Legalizing extending load operation\n"); EVT SrcVT = LD->getMemoryVT(); unsigned SrcWidth = SrcVT.getSizeInBits(); unsigned Alignment = LD->getAlignment(); @@ -947,39 +946,9 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { } } -static TargetLowering::LegalizeAction -getStrictFPOpcodeAction(const TargetLowering &TLI, unsigned Opcode, EVT VT) { - unsigned EqOpc; - switch (Opcode) { - default: llvm_unreachable("Unexpected FP pseudo-opcode"); - case ISD::STRICT_FSQRT: EqOpc = ISD::FSQRT; break; - case ISD::STRICT_FPOW: EqOpc = ISD::FPOW; break; - case ISD::STRICT_FPOWI: EqOpc = ISD::FPOWI; break; - case ISD::STRICT_FMA: EqOpc = ISD::FMA; break; - case ISD::STRICT_FSIN: EqOpc = ISD::FSIN; break; - case ISD::STRICT_FCOS: EqOpc = ISD::FCOS; break; - case ISD::STRICT_FEXP: EqOpc = ISD::FEXP; break; - case ISD::STRICT_FEXP2: EqOpc = ISD::FEXP2; break; - case ISD::STRICT_FLOG: EqOpc = ISD::FLOG; break; - case ISD::STRICT_FLOG10: EqOpc = ISD::FLOG10; break; - case ISD::STRICT_FLOG2: EqOpc = ISD::FLOG2; break; - case ISD::STRICT_FRINT: EqOpc = ISD::FRINT; break; - case ISD::STRICT_FNEARBYINT: EqOpc = ISD::FNEARBYINT; break; - } - - auto Action = TLI.getOperationAction(EqOpc, VT); - - // We don't currently handle Custom or Promote for strict FP pseudo-ops. - // For now, we just expand for those cases. - if (Action != TargetLowering::Legal) - Action = TargetLowering::Expand; - - return Action; -} - /// Return a legal replacement for the given operation, with all legal operands. void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { - DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG)); + LLVM_DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG)); // Allow illegal target nodes and illegal registers. if (Node->getOpcode() == ISD::TargetConstant || @@ -1043,8 +1012,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::SETCC: case ISD::BR_CC: { unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 : - Node->getOpcode() == ISD::SETCC ? 2 : - Node->getOpcode() == ISD::SETCCE ? 3 : 1; + Node->getOpcode() == ISD::SETCC ? 2 : 1; unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0; MVT OpVT = Node->getOperand(CompareOperand).getSimpleValueType(); ISD::CondCode CCCode = @@ -1122,6 +1090,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { return; } break; + case ISD::STRICT_FADD: + case ISD::STRICT_FSUB: + case ISD::STRICT_FMUL: + case ISD::STRICT_FDIV: case ISD::STRICT_FSQRT: case ISD::STRICT_FMA: case ISD::STRICT_FPOW: @@ -1139,8 +1111,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT // is also legal, but if ISD::FSQRT requires expansion then so does // ISD::STRICT_FSQRT. - Action = getStrictFPOpcodeAction(TLI, Node->getOpcode(), - Node->getValueType(0)); + Action = TLI.getStrictFPOperationAction(Node->getOpcode(), + Node->getValueType(0)); break; default: if (Node->getOpcode() >= ISD::BUILTIN_OP_END) { @@ -1202,10 +1174,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { } switch (Action) { case TargetLowering::Legal: - DEBUG(dbgs() << "Legal node: nothing to do\n"); + LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n"); return; case TargetLowering::Custom: - DEBUG(dbgs() << "Trying custom legalization\n"); + LLVM_DEBUG(dbgs() << "Trying custom legalization\n"); // FIXME: The handling for custom lowering with multiple results is // a complete mess. if (SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG)) { @@ -1213,7 +1185,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { return; if (Node->getNumValues() == 1) { - DEBUG(dbgs() << "Successfully custom legalized node\n"); + LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n"); // We can just directly replace this node with the lowered value. ReplaceNode(SDValue(Node, 0), Res); return; @@ -1222,11 +1194,11 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { SmallVector<SDValue, 8> ResultVals; for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) ResultVals.push_back(Res.getValue(i)); - DEBUG(dbgs() << "Successfully custom legalized node\n"); + LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n"); ReplaceNode(Node, ResultVals.data()); return; } - DEBUG(dbgs() << "Could not custom legalize node\n"); + LLVM_DEBUG(dbgs() << "Could not custom legalize node\n"); LLVM_FALLTHROUGH; case TargetLowering::Expand: if (ExpandNode(Node)) @@ -1623,6 +1595,7 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS, MVT OpVT = LHS.getSimpleValueType(); ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get(); NeedInvert = false; + bool NeedSwap = false; switch (TLI.getCondCodeAction(CCCode, OpVT)) { default: llvm_unreachable("Unknown condition code action!"); case TargetLowering::Legal: @@ -1630,23 +1603,37 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS, break; case TargetLowering::Expand: { ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode); - if (TLI.isCondCodeLegal(InvCC, OpVT)) { + if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) { std::swap(LHS, RHS); CC = DAG.getCondCode(InvCC); return true; } + // Swapping operands didn't work. Try inverting the condition. + InvCC = getSetCCInverse(CCCode, OpVT.isInteger()); + if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) { + // If inverting the condition is not enough, try swapping operands + // on top of it. + InvCC = ISD::getSetCCSwappedOperands(InvCC); + NeedSwap = true; + } + if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) { + CC = DAG.getCondCode(InvCC); + NeedInvert = true; + if (NeedSwap) + std::swap(LHS, RHS); + return true; + } + ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID; unsigned Opc = 0; switch (CCCode) { default: llvm_unreachable("Don't know how to expand this condition!"); case ISD::SETO: - assert(TLI.getCondCodeAction(ISD::SETOEQ, OpVT) - == TargetLowering::Legal + assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) && "If SETO is expanded, SETOEQ must be legal!"); CC1 = ISD::SETOEQ; CC2 = ISD::SETOEQ; Opc = ISD::AND; break; case ISD::SETUO: - assert(TLI.getCondCodeAction(ISD::SETUNE, OpVT) - == TargetLowering::Legal + assert(TLI.isCondCodeLegal(ISD::SETUNE, OpVT) && "If SETUO is expanded, SETUNE must be legal!"); CC1 = ISD::SETUNE; CC2 = ISD::SETUNE; Opc = ISD::OR; break; case ISD::SETOEQ: @@ -1676,20 +1663,10 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS, case ISD::SETGT: case ISD::SETGE: case ISD::SETLT: - // We only support using the inverted operation, which is computed above - // and not a different manner of supporting expanding these cases. - llvm_unreachable("Don't know how to expand this condition!"); case ISD::SETNE: case ISD::SETEQ: - // Try inverting the result of the inverse condition. - InvCC = CCCode == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ; - if (TLI.isCondCodeLegal(InvCC, OpVT)) { - CC = DAG.getCondCode(InvCC); - NeedInvert = true; - return true; - } - // If inverting the condition didn't work then we have no means to expand - // the condition. + // If all combinations of inverting the condition and swapping operands + // didn't work then we have no means to expand the condition. llvm_unreachable("Don't know how to expand this condition!"); } @@ -2036,12 +2013,12 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); if (!CallInfo.second.getNode()) { - DEBUG(dbgs() << "Created tailcall: "; DAG.getRoot().dump()); + LLVM_DEBUG(dbgs() << "Created tailcall: "; DAG.getRoot().dump()); // It's a tailcall, return the chain (which is the DAG root). return DAG.getRoot(); } - DEBUG(dbgs() << "Created libcall: "; CallInfo.first.dump()); + LLVM_DEBUG(dbgs() << "Created libcall: "; CallInfo.first.dump()); return CallInfo.first; } @@ -2327,10 +2304,10 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, EVT DestVT, const SDLoc &dl) { // TODO: Should any fast-math-flags be set for the created nodes? - DEBUG(dbgs() << "Legalizing INT_TO_FP\n"); + LLVM_DEBUG(dbgs() << "Legalizing INT_TO_FP\n"); if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) { - DEBUG(dbgs() << "32-bit [signed|unsigned] integer to float/double " - "expansion\n"); + LLVM_DEBUG(dbgs() << "32-bit [signed|unsigned] integer to float/double " + "expansion\n"); // Get the stack frame index of a 8 byte buffer. SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64); @@ -2395,7 +2372,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, // and in all alternate rounding modes. // TODO: Generalize this for use with other types. if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f64) { - DEBUG(dbgs() << "Converting unsigned i64 to f64\n"); + LLVM_DEBUG(dbgs() << "Converting unsigned i64 to f64\n"); SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, MVT::i64); SDValue TwoP84PlusTwoP52 = @@ -2418,7 +2395,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, // TODO: Generalize this for use with other types. if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) { - DEBUG(dbgs() << "Converting unsigned i64 to f32\n"); + LLVM_DEBUG(dbgs() << "Converting unsigned i64 to f32\n"); // For unsigned conversions, convert them to signed conversions using the // algorithm from the x86_64 __floatundidf in compiler_rt. if (!isSigned) { @@ -2853,7 +2830,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, } bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { - DEBUG(dbgs() << "Trying to expand node\n"); + LLVM_DEBUG(dbgs() << "Trying to expand node\n"); SmallVector<SDValue, 8> Results; SDLoc dl(Node); SDValue Tmp1, Tmp2, Tmp3, Tmp4; @@ -3311,7 +3288,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { } break; case ISD::FP_TO_FP16: - DEBUG(dbgs() << "Legalizing FP_TO_FP16\n"); + LLVM_DEBUG(dbgs() << "Legalizing FP_TO_FP16\n"); if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) { SDValue Op = Node->getOperand(0); MVT SVT = Op.getSimpleValueType(); @@ -3525,15 +3502,25 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::USUBO: { SDValue LHS = Node->getOperand(0); SDValue RHS = Node->getOperand(1); - SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::UADDO ? - ISD::ADD : ISD::SUB, dl, LHS.getValueType(), - LHS, RHS); + bool IsAdd = Node->getOpcode() == ISD::UADDO; + // If ADD/SUBCARRY is legal, use that instead. + unsigned OpcCarry = IsAdd ? ISD::ADDCARRY : ISD::SUBCARRY; + if (TLI.isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) { + SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1)); + SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(), + { LHS, RHS, CarryIn }); + Results.push_back(SDValue(NodeCarry.getNode(), 0)); + Results.push_back(SDValue(NodeCarry.getNode(), 1)); + break; + } + + SDValue Sum = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl, + LHS.getValueType(), LHS, RHS); Results.push_back(Sum); EVT ResultType = Node->getValueType(1); EVT SetCCType = getSetCCResultType(Node->getValueType(0)); - ISD::CondCode CC - = Node->getOpcode() == ISD::UADDO ? ISD::SETULT : ISD::SETUGT; + ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT; SDValue SetCC = DAG.getSetCC(dl, SetCCType, Sum, LHS, CC); Results.push_back(DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType)); @@ -3684,8 +3671,17 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { unsigned EntrySize = DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD); - Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index, - DAG.getConstant(EntrySize, dl, Index.getValueType())); + // For power-of-two jumptable entry sizes convert multiplication to a shift. + // This transformation needs to be done here since otherwise the MIPS + // backend will end up emitting a three instruction multiply sequence + // instead of a single shift and MSP430 will call a runtime function. + if (llvm::isPowerOf2_32(EntrySize)) + Index = DAG.getNode( + ISD::SHL, dl, Index.getValueType(), Index, + DAG.getConstant(llvm::Log2_32(EntrySize), dl, Index.getValueType())); + else + Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index, + DAG.getConstant(EntrySize, dl, Index.getValueType())); SDValue Addr = DAG.getNode(ISD::ADD, dl, Index.getValueType(), Index, Table); @@ -3701,7 +3697,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, TLI.getPICJumpTableRelocBase(Table, DAG)); } - Tmp1 = DAG.getNode(ISD::BRIND, dl, MVT::Other, LD.getValue(1), Addr); + + Tmp1 = TLI.expandIndirectJTBranch(dl, LD.getValue(1), Addr, DAG); Results.push_back(Tmp1); break; } @@ -3720,7 +3717,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { if (Tmp2.isUndef() || (Tmp2.getOpcode() == ISD::AND && isa<ConstantSDNode>(Tmp2.getOperand(1)) && - dyn_cast<ConstantSDNode>(Tmp2.getOperand(1))->getZExtValue() == 1)) + cast<ConstantSDNode>(Tmp2.getOperand(1))->getZExtValue() == 1)) Tmp3 = Tmp2; else Tmp3 = DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2, @@ -3759,7 +3756,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { // illegal; expand it into a SELECT_CC. EVT VT = Node->getValueType(0); int TrueValue; - switch (TLI.getBooleanContents(Tmp1->getValueType(0))) { + switch (TLI.getBooleanContents(Tmp1.getValueType())) { case TargetLowering::ZeroOrOneBooleanContent: case TargetLowering::UndefinedBooleanContent: TrueValue = 1; @@ -3784,7 +3781,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { SDValue CC = Node->getOperand(4); ISD::CondCode CCOp = cast<CondCodeSDNode>(CC)->get(); - if (TLI.isCondCodeLegal(CCOp, Tmp1.getSimpleValueType())) { + if (TLI.isCondCodeLegalOrCustom(CCOp, Tmp1.getSimpleValueType())) { // If the condition code is legal, then we need to expand this // node using SETCC and SELECT. EVT CmpVT = Tmp1.getValueType(); @@ -3805,7 +3802,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { // version (or vice versa). ISD::CondCode InvCC = ISD::getSetCCInverse(CCOp, Tmp1.getValueType().isInteger()); - if (TLI.isCondCodeLegal(InvCC, Tmp1.getSimpleValueType())) { + if (TLI.isCondCodeLegalOrCustom(InvCC, Tmp1.getSimpleValueType())) { // Use the new condition code and swap true and false Legalized = true; Tmp1 = DAG.getSelectCC(dl, Tmp1, Tmp2, Tmp4, Tmp3, InvCC); @@ -3813,7 +3810,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { // If The inverse is not legal, then try to swap the arguments using // the inverse condition code. ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InvCC); - if (TLI.isCondCodeLegal(SwapInvCC, Tmp1.getSimpleValueType())) { + if (TLI.isCondCodeLegalOrCustom(SwapInvCC, Tmp1.getSimpleValueType())) { // The swapped inverse condition is legal, so swap true and false, // lhs and rhs. Legalized = true; @@ -3906,6 +3903,46 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { ReplaceNode(SDValue(Node, 0), Result); break; } + case ISD::ROTL: + case ISD::ROTR: { + bool IsLeft = Node->getOpcode() == ISD::ROTL; + SDValue Op0 = Node->getOperand(0), Op1 = Node->getOperand(1); + EVT ResVT = Node->getValueType(0); + EVT OpVT = Op0.getValueType(); + assert(OpVT == ResVT && + "The result and the operand types of rotate should match"); + EVT ShVT = Op1.getValueType(); + SDValue Width = DAG.getConstant(OpVT.getScalarSizeInBits(), dl, ShVT); + + // If a rotate in the other direction is legal, use it. + unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL; + if (TLI.isOperationLegal(RevRot, ResVT)) { + SDValue Sub = DAG.getNode(ISD::SUB, dl, ShVT, Width, Op1); + Results.push_back(DAG.getNode(RevRot, dl, ResVT, Op0, Sub)); + break; + } + + // Otherwise, + // (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and w-c, w-1))) + // (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and w-c, w-1))) + // + assert(isPowerOf2_32(OpVT.getScalarSizeInBits()) && + "Expecting the type bitwidth to be a power of 2"); + unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL; + unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL; + SDValue Width1 = DAG.getNode(ISD::SUB, dl, ShVT, + Width, DAG.getConstant(1, dl, ShVT)); + SDValue NegOp1 = DAG.getNode(ISD::SUB, dl, ShVT, Width, Op1); + SDValue And0 = DAG.getNode(ISD::AND, dl, ShVT, Op1, Width1); + SDValue And1 = DAG.getNode(ISD::AND, dl, ShVT, NegOp1, Width1); + + SDValue Or = DAG.getNode(ISD::OR, dl, ResVT, + DAG.getNode(ShOpc, dl, ResVT, Op0, And0), + DAG.getNode(HsOpc, dl, ResVT, Op0, And1)); + Results.push_back(Or); + break; + } + case ISD::GLOBAL_OFFSET_TABLE: case ISD::GlobalAddress: case ISD::GlobalTLSAddress: @@ -3921,19 +3958,21 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { // Replace the original node with the legalized result. if (Results.empty()) { - DEBUG(dbgs() << "Cannot expand node\n"); + LLVM_DEBUG(dbgs() << "Cannot expand node\n"); return false; } - DEBUG(dbgs() << "Succesfully expanded node\n"); + LLVM_DEBUG(dbgs() << "Succesfully expanded node\n"); ReplaceNode(Node, Results.data()); return true; } void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { - DEBUG(dbgs() << "Trying to convert node to libcall\n"); + LLVM_DEBUG(dbgs() << "Trying to convert node to libcall\n"); SmallVector<SDValue, 8> Results; SDLoc dl(Node); + // FIXME: Check flags on the node to see if we can use a finite call. + bool CanUseFiniteLibCall = TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath; unsigned Opc = Node->getOpcode(); switch (Opc) { case ISD::ATOMIC_FENCE: { @@ -3962,6 +4001,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { case ISD::ATOMIC_LOAD_ADD: case ISD::ATOMIC_LOAD_SUB: case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_CLR: case ISD::ATOMIC_LOAD_OR: case ISD::ATOMIC_LOAD_XOR: case ISD::ATOMIC_LOAD_NAND: @@ -4028,33 +4068,68 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { break; case ISD::FLOG: case ISD::STRICT_FLOG: - Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, - RTLIB::LOG_F80, RTLIB::LOG_F128, - RTLIB::LOG_PPCF128)); + if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log_finite)) + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_FINITE_F32, + RTLIB::LOG_FINITE_F64, + RTLIB::LOG_FINITE_F80, + RTLIB::LOG_FINITE_F128, + RTLIB::LOG_FINITE_PPCF128)); + else + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, + RTLIB::LOG_F80, RTLIB::LOG_F128, + RTLIB::LOG_PPCF128)); break; case ISD::FLOG2: case ISD::STRICT_FLOG2: - Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64, - RTLIB::LOG2_F80, RTLIB::LOG2_F128, - RTLIB::LOG2_PPCF128)); + if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log2_finite)) + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_FINITE_F32, + RTLIB::LOG2_FINITE_F64, + RTLIB::LOG2_FINITE_F80, + RTLIB::LOG2_FINITE_F128, + RTLIB::LOG2_FINITE_PPCF128)); + else + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64, + RTLIB::LOG2_F80, RTLIB::LOG2_F128, + RTLIB::LOG2_PPCF128)); break; case ISD::FLOG10: case ISD::STRICT_FLOG10: - Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64, - RTLIB::LOG10_F80, RTLIB::LOG10_F128, - RTLIB::LOG10_PPCF128)); + if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log10_finite)) + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_FINITE_F32, + RTLIB::LOG10_FINITE_F64, + RTLIB::LOG10_FINITE_F80, + RTLIB::LOG10_FINITE_F128, + RTLIB::LOG10_FINITE_PPCF128)); + else + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64, + RTLIB::LOG10_F80, RTLIB::LOG10_F128, + RTLIB::LOG10_PPCF128)); break; case ISD::FEXP: case ISD::STRICT_FEXP: - Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64, - RTLIB::EXP_F80, RTLIB::EXP_F128, - RTLIB::EXP_PPCF128)); + if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_exp_finite)) + Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_FINITE_F32, + RTLIB::EXP_FINITE_F64, + RTLIB::EXP_FINITE_F80, + RTLIB::EXP_FINITE_F128, + RTLIB::EXP_FINITE_PPCF128)); + else + Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64, + RTLIB::EXP_F80, RTLIB::EXP_F128, + RTLIB::EXP_PPCF128)); break; case ISD::FEXP2: case ISD::STRICT_FEXP2: - Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, - RTLIB::EXP2_F80, RTLIB::EXP2_F128, - RTLIB::EXP2_PPCF128)); + if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_exp2_finite)) + Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_FINITE_F32, + RTLIB::EXP2_FINITE_F64, + RTLIB::EXP2_FINITE_F80, + RTLIB::EXP2_FINITE_F128, + RTLIB::EXP2_FINITE_PPCF128)); + else + Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, + RTLIB::EXP2_F80, RTLIB::EXP2_F128, + RTLIB::EXP2_PPCF128)); break; case ISD::FTRUNC: Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, @@ -4100,9 +4175,16 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { break; case ISD::FPOW: case ISD::STRICT_FPOW: - Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64, - RTLIB::POW_F80, RTLIB::POW_F128, - RTLIB::POW_PPCF128)); + if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_pow_finite)) + Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_FINITE_F32, + RTLIB::POW_FINITE_F64, + RTLIB::POW_FINITE_F80, + RTLIB::POW_FINITE_F128, + RTLIB::POW_FINITE_PPCF128)); + else + Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64, + RTLIB::POW_F80, RTLIB::POW_F128, + RTLIB::POW_PPCF128)); break; case ISD::FDIV: Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64, @@ -4186,10 +4268,10 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { // Replace the original node with the legalized result. if (!Results.empty()) { - DEBUG(dbgs() << "Successfully converted node to libcall\n"); + LLVM_DEBUG(dbgs() << "Successfully converted node to libcall\n"); ReplaceNode(Node, Results.data()); } else - DEBUG(dbgs() << "Could not convert node to libcall\n"); + LLVM_DEBUG(dbgs() << "Could not convert node to libcall\n"); } // Determine the vector type to use in place of an original scalar element when @@ -4203,7 +4285,7 @@ static MVT getPromotedVectorElementType(const TargetLowering &TLI, } void SelectionDAGLegalize::PromoteNode(SDNode *Node) { - DEBUG(dbgs() << "Trying to promote node\n"); + LLVM_DEBUG(dbgs() << "Trying to promote node\n"); SmallVector<SDValue, 8> Results; MVT OVT = Node->getSimpleValueType(0); if (Node->getOpcode() == ISD::UINT_TO_FP || @@ -4256,7 +4338,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { ISD::SRL, dl, NVT, Tmp1, DAG.getConstant(DiffBits, dl, TLI.getShiftAmountTy(NVT, DAG.getDataLayout()))); - Results.push_back(Tmp1); + + Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1)); break; } case ISD::FP_TO_UINT: @@ -4640,10 +4723,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { // Replace the original node with the legalized result. if (!Results.empty()) { - DEBUG(dbgs() << "Successfully promoted node\n"); + LLVM_DEBUG(dbgs() << "Successfully promoted node\n"); ReplaceNode(Node, Results.data()); } else - DEBUG(dbgs() << "Could not promote node\n"); + LLVM_DEBUG(dbgs() << "Could not promote node\n"); } /// This is the entry point for the file. diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index e28a3aa47ca3..b0ae1e0399fb 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -47,8 +47,8 @@ static RTLIB::Libcall GetFPLibCall(EVT VT, //===----------------------------------------------------------------------===// bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { - DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG); - dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG); + dbgs() << "\n"); SDValue R = SDValue(); switch (N->getOpcode()) { @@ -738,8 +738,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { //===----------------------------------------------------------------------===// bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { - DEBUG(dbgs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG); - dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG); + dbgs() << "\n"); SDValue Res = SDValue(); switch (N->getOpcode()) { @@ -1039,7 +1039,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { /// have invalid operands or may have other results that need promotion, we just /// know that (at least) one result needs expansion. void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { - DEBUG(dbgs() << "Expand float result: "; N->dump(&DAG); dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Expand float result: "; N->dump(&DAG); dbgs() << "\n"); SDValue Lo, Hi; Lo = Hi = SDValue(); @@ -1538,7 +1538,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, /// types of the node are known to be legal, but other operands of the node may /// need promotion or expansion as well as the specified one. bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { - DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG); dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG); dbgs() << "\n"); SDValue Res = SDValue(); // See if the target wants to custom expand this node. @@ -1658,18 +1658,6 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) { EVT RVT = N->getValueType(0); SDLoc dl(N); - // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on - // PPC (the libcall is not available). FIXME: Do this in a less hacky way. - if (RVT == MVT::i32) { - assert(N->getOperand(0).getValueType() == MVT::ppcf128 && - "Logic only correct for ppcf128!"); - SDValue Res = DAG.getNode(ISD::FP_ROUND_INREG, dl, MVT::ppcf128, - N->getOperand(0), DAG.getValueType(MVT::f64)); - Res = DAG.getNode(ISD::FP_ROUND, dl, MVT::f64, Res, - DAG.getIntPtrConstant(1, dl)); - return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res); - } - RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); return TLI.makeLibCall(DAG, LC, RVT, N->getOperand(0), false, dl).first; @@ -1679,31 +1667,6 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { EVT RVT = N->getValueType(0); SDLoc dl(N); - // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on - // PPC (the libcall is not available). FIXME: Do this in a less hacky way. - if (RVT == MVT::i32) { - assert(N->getOperand(0).getValueType() == MVT::ppcf128 && - "Logic only correct for ppcf128!"); - const uint64_t TwoE31[] = {0x41e0000000000000LL, 0}; - APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31)); - SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128); - // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X - // FIXME: generated code sucks. - // TODO: Are there fast-math-flags to propagate to this FSUB? - return DAG.getSelectCC(dl, N->getOperand(0), Tmp, - DAG.getNode(ISD::ADD, dl, MVT::i32, - DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, - DAG.getNode(ISD::FSUB, dl, - MVT::ppcf128, - N->getOperand(0), - Tmp)), - DAG.getConstant(0x80000000, dl, - MVT::i32)), - DAG.getNode(ISD::FP_TO_SINT, dl, - MVT::i32, N->getOperand(0)), - ISD::SETGE); - } - RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); return TLI.makeLibCall(DAG, LC, N->getValueType(0), N->getOperand(0), @@ -2139,13 +2102,12 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_LOAD(SDNode *N) { // Load the value as an integer value with the same number of bits. EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); - auto MMOFlags = - L->getMemOperand()->getFlags() & - ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable); SDValue newL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), IVT, SDLoc(N), L->getChain(), L->getBasePtr(), L->getOffset(), L->getPointerInfo(), IVT, - L->getAlignment(), MMOFlags, L->getAAInfo()); + L->getAlignment(), + L->getMemOperand()->getFlags(), + L->getAAInfo()); // Legalize the chain result by replacing uses of the old value chain with the // new one ReplaceValueWith(SDValue(N, 1), newL.getValue(1)); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 29f0bb475b08..63a1ea13a5f5 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -36,12 +36,13 @@ using namespace llvm; /// may also have invalid operands or may have other results that need /// expansion, we just know that (at least) one result needs promotion. void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { - DEBUG(dbgs() << "Promote integer result: "; N->dump(&DAG); dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Promote integer result: "; N->dump(&DAG); + dbgs() << "\n"); SDValue Res = SDValue(); // See if the target wants to custom expand this node. if (CustomLowerNode(N, N->getValueType(ResNo), true)) { - DEBUG(dbgs() << "Node has been custom expanded, done\n"); + LLVM_DEBUG(dbgs() << "Node has been custom expanded, done\n"); return; } @@ -146,6 +147,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::ATOMIC_LOAD_ADD: case ISD::ATOMIC_LOAD_SUB: case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_CLR: case ISD::ATOMIC_LOAD_OR: case ISD::ATOMIC_LOAD_XOR: case ISD::ATOMIC_LOAD_NAND: @@ -501,7 +503,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) { SDLoc dl(N); SDValue Ops[] = {N->getChain(), ExtSrc0, N->getMask(), N->getBasePtr(), - N->getIndex()}; + N->getIndex(), N->getScale() }; SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other), N->getMemoryVT(), dl, Ops, N->getMemOperand()); @@ -586,43 +588,39 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { - EVT SVT = getSetCCResultType(N->getOperand(0).getValueType()); - + EVT InVT = N->getOperand(0).getValueType(); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - // Only use the result of getSetCCResultType if it is legal, - // otherwise just use the promoted result type (NVT). - if (!TLI.isTypeLegal(SVT)) - SVT = NVT; + EVT SVT = getSetCCResultType(InVT); + + // If we got back a type that needs to be promoted, this likely means the + // the input type also needs to be promoted. So get the promoted type for + // the input and try the query again. + if (getTypeAction(SVT) == TargetLowering::TypePromoteInteger) { + if (getTypeAction(InVT) == TargetLowering::TypePromoteInteger) { + InVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); + SVT = getSetCCResultType(InVT); + } else { + // Input type isn't promoted, just use the default promoted type. + SVT = NVT; + } + } SDLoc dl(N); assert(SVT.isVector() == N->getOperand(0).getValueType().isVector() && "Vector compare must return a vector result!"); - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); - if (LHS.getValueType() != RHS.getValueType()) { - if (getTypeAction(LHS.getValueType()) == TargetLowering::TypePromoteInteger && - !LHS.getValueType().isVector()) - LHS = GetPromotedInteger(LHS); - if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger && - !RHS.getValueType().isVector()) - RHS = GetPromotedInteger(RHS); - } - // Get the SETCC result using the canonical SETCC type. - SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, LHS, RHS, - N->getOperand(2)); + SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, N->getOperand(0), + N->getOperand(1), N->getOperand(2)); // Convert to the expected type. return DAG.getSExtOrTrunc(SetCC, dl, NVT); } SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) { - SDValue LHS = N->getOperand(0); + SDValue LHS = GetPromotedInteger(N->getOperand(0)); SDValue RHS = N->getOperand(1); - if (getTypeAction(LHS.getValueType()) == TargetLowering::TypePromoteInteger) - LHS = GetPromotedInteger(LHS); if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) RHS = ZExtPromotedInteger(RHS); return DAG.getNode(ISD::SHL, SDLoc(N), LHS.getValueType(), LHS, RHS); @@ -661,22 +659,18 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) { - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); // The input value must be properly sign extended. - if (getTypeAction(LHS.getValueType()) == TargetLowering::TypePromoteInteger) - LHS = SExtPromotedInteger(LHS); + SDValue LHS = SExtPromotedInteger(N->getOperand(0)); + SDValue RHS = N->getOperand(1); if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) RHS = ZExtPromotedInteger(RHS); return DAG.getNode(ISD::SRA, SDLoc(N), LHS.getValueType(), LHS, RHS); } SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) { - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); // The input value must be properly zero extended. - if (getTypeAction(LHS.getValueType()) == TargetLowering::TypePromoteInteger) - LHS = ZExtPromotedInteger(LHS); + SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); + SDValue RHS = N->getOperand(1); if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) RHS = ZExtPromotedInteger(RHS); return DAG.getNode(ISD::SRL, SDLoc(N), LHS.getValueType(), LHS, RHS); @@ -904,11 +898,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) { /// result types of the node are known to be legal, but other operands of the /// node may need promotion or expansion as well as the specified one. bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { - DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG); dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG); + dbgs() << "\n"); SDValue Res = SDValue(); if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) { - DEBUG(dbgs() << "Node has been custom lowered, done\n"); + LLVM_DEBUG(dbgs() << "Node has been custom lowered, done\n"); return false; } @@ -1001,11 +996,11 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS, // than the width of NewLHS/NewRH, we can avoid inserting real truncate // instruction, which is redudant eventually. unsigned OpLEffectiveBits = - OpL.getValueSizeInBits() - DAG.ComputeNumSignBits(OpL) + 1; + OpL.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpL) + 1; unsigned OpREffectiveBits = - OpR.getValueSizeInBits() - DAG.ComputeNumSignBits(OpR) + 1; - if (OpLEffectiveBits <= NewLHS.getValueSizeInBits() && - OpREffectiveBits <= NewRHS.getValueSizeInBits()) { + OpR.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpR) + 1; + if (OpLEffectiveBits <= NewLHS.getScalarValueSizeInBits() && + OpREffectiveBits <= NewRHS.getScalarValueSizeInBits()) { NewLHS = OpL; NewRHS = OpR; } else { @@ -1356,7 +1351,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo) { /// have invalid operands or may have other results that need promotion, we just /// know that (at least) one result needs expansion. void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { - DEBUG(dbgs() << "Expand integer result: "; N->dump(&DAG); dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Expand integer result: "; N->dump(&DAG); + dbgs() << "\n"); SDValue Lo, Hi; Lo = Hi = SDValue(); @@ -1413,6 +1409,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::ATOMIC_LOAD_ADD: case ISD::ATOMIC_LOAD_SUB: case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_CLR: case ISD::ATOMIC_LOAD_OR: case ISD::ATOMIC_LOAD_XOR: case ISD::ATOMIC_LOAD_NAND: @@ -2893,7 +2890,8 @@ void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N, /// result types of the node are known to be legal, but other operands of the /// node may need promotion or expansion as well as the specified one. bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { - DEBUG(dbgs() << "Expand integer operand: "; N->dump(&DAG); dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Expand integer operand: "; N->dump(&DAG); + dbgs() << "\n"); SDValue Res = SDValue(); if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) @@ -2915,7 +2913,6 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break; case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break; case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break; - case ISD::SETCCE: Res = ExpandIntOp_SETCCE(N); break; case ISD::SETCCCARRY: Res = ExpandIntOp_SETCCCARRY(N); break; case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break; case ISD::STORE: Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break; @@ -3051,15 +3048,14 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, return; } - // Lower with SETCCE or SETCCCARRY if the target supports it. + // Lower with SETCCCARRY if the target supports it. EVT HiVT = LHSHi.getValueType(); EVT ExpandVT = TLI.getTypeToExpandTo(*DAG.getContext(), HiVT); bool HasSETCCCARRY = TLI.isOperationLegalOrCustom(ISD::SETCCCARRY, ExpandVT); // FIXME: Make all targets support this, then remove the other lowering. - if (HasSETCCCARRY || - TLI.getOperationAction(ISD::SETCCE, ExpandVT) == TargetLowering::Custom) { - // SETCCE/SETCCCARRY can detect < and >= directly. For > and <=, flip + if (HasSETCCCARRY) { + // SETCCCARRY can detect < and >= directly. For > and <=, flip // operands and condition code. bool FlipOperands = false; switch (CCCode) { @@ -3074,17 +3070,15 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, std::swap(LHSHi, RHSHi); } // Perform a wide subtraction, feeding the carry from the low part into - // SETCCE/SETCCCARRY. The SETCCE/SETCCCARRY operation is essentially - // looking at the high part of the result of LHS - RHS. It is negative - // iff LHS < RHS. It is zero or positive iff LHS >= RHS. + // SETCCCARRY. The SETCCCARRY operation is essentially looking at the high + // part of the result of LHS - RHS. It is negative iff LHS < RHS. It is + // zero or positive iff LHS >= RHS. EVT LoVT = LHSLo.getValueType(); - SDVTList VTList = DAG.getVTList( - LoVT, HasSETCCCARRY ? getSetCCResultType(LoVT) : MVT::Glue); - SDValue LowCmp = DAG.getNode(HasSETCCCARRY ? ISD::USUBO : ISD::SUBC, dl, - VTList, LHSLo, RHSLo); - SDValue Res = DAG.getNode(HasSETCCCARRY ? ISD::SETCCCARRY : ISD::SETCCE, dl, - getSetCCResultType(HiVT), LHSHi, RHSHi, - LowCmp.getValue(1), DAG.getCondCode(CCCode)); + SDVTList VTList = DAG.getVTList(LoVT, getSetCCResultType(LoVT)); + SDValue LowCmp = DAG.getNode(ISD::USUBO, dl, VTList, LHSLo, RHSLo); + SDValue Res = DAG.getNode(ISD::SETCCCARRY, dl, getSetCCResultType(HiVT), + LHSHi, RHSHi, LowCmp.getValue(1), + DAG.getCondCode(CCCode)); NewLHS = Res; NewRHS = SDValue(); return; @@ -3152,24 +3146,6 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) { DAG.UpdateNodeOperands(N, NewLHS, NewRHS, DAG.getCondCode(CCCode)), 0); } -SDValue DAGTypeLegalizer::ExpandIntOp_SETCCE(SDNode *N) { - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); - SDValue Carry = N->getOperand(2); - SDValue Cond = N->getOperand(3); - SDLoc dl = SDLoc(N); - - SDValue LHSLo, LHSHi, RHSLo, RHSHi; - GetExpandedInteger(LHS, LHSLo, LHSHi); - GetExpandedInteger(RHS, RHSLo, RHSHi); - - // Expand to a SUBE for the low part and a smaller SETCCE for the high. - SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), MVT::Glue); - SDValue LowCmp = DAG.getNode(ISD::SUBE, dl, VTList, LHSLo, RHSLo, Carry); - return DAG.getNode(ISD::SETCCE, dl, N->getValueType(0), LHSHi, RHSHi, - LowCmp.getValue(1), Cond); -} - SDValue DAGTypeLegalizer::ExpandIntOp_SETCCCARRY(SDNode *N) { SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); @@ -3497,21 +3473,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { assert(NumElem * NumOperands == NumOutElem && "Unexpected number of elements"); - // If the input type is legal and we can promote it to a legal type with the - // same element size, go ahead do that to create a new concat. - if (getTypeAction(N->getOperand(0).getValueType()) == - TargetLowering::TypeLegal) { - EVT InPromotedTy = EVT::getVectorVT(*DAG.getContext(), OutElemTy, NumElem); - if (TLI.isTypeLegal(InPromotedTy)) { - SmallVector<SDValue, 8> Ops(NumOperands); - for (unsigned i = 0; i < NumOperands; ++i) { - Ops[i] = DAG.getNode(ISD::ANY_EXTEND, dl, InPromotedTy, - N->getOperand(i)); - } - return DAG.getNode(ISD::CONCAT_VECTORS, dl, NOutVT, Ops); - } - } - // Take the elements from the first vector. SmallVector<SDValue, 8> Ops(NumOutElem); for (unsigned i = 0; i < NumOperands; ++i) { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 4438ee7878b8..a9f144c06e9a 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -84,9 +84,11 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { SDValue Res(&Node, i); EVT VT = Res.getValueType(); bool Failed = false; + // Don't create a value in map. + auto ResId = (ValueToIdMap.count(Res)) ? ValueToIdMap[Res] : 0; unsigned Mapped = 0; - if (ReplacedValues.find(Res) != ReplacedValues.end()) { + if (ResId && (ReplacedValues.find(ResId) != ReplacedValues.end())) { Mapped |= 1; // Check that remapped values are only used by nodes marked NewNode. for (SDNode::use_iterator UI = Node.use_begin(), UE = Node.use_end(); @@ -97,30 +99,32 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { // Check that the final result of applying ReplacedValues is not // marked NewNode. - SDValue NewVal = ReplacedValues[Res]; - DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(NewVal); + auto NewValId = ReplacedValues[ResId]; + auto I = ReplacedValues.find(NewValId); while (I != ReplacedValues.end()) { - NewVal = I->second; - I = ReplacedValues.find(NewVal); + NewValId = I->second; + I = ReplacedValues.find(NewValId); } + SDValue NewVal = getSDValue(NewValId); + (void)NewVal; assert(NewVal.getNode()->getNodeId() != NewNode && "ReplacedValues maps to a new node!"); } - if (PromotedIntegers.find(Res) != PromotedIntegers.end()) + if (ResId && PromotedIntegers.find(ResId) != PromotedIntegers.end()) Mapped |= 2; - if (SoftenedFloats.find(Res) != SoftenedFloats.end()) + if (ResId && SoftenedFloats.find(ResId) != SoftenedFloats.end()) Mapped |= 4; - if (ScalarizedVectors.find(Res) != ScalarizedVectors.end()) + if (ResId && ScalarizedVectors.find(ResId) != ScalarizedVectors.end()) Mapped |= 8; - if (ExpandedIntegers.find(Res) != ExpandedIntegers.end()) + if (ResId && ExpandedIntegers.find(ResId) != ExpandedIntegers.end()) Mapped |= 16; - if (ExpandedFloats.find(Res) != ExpandedFloats.end()) + if (ResId && ExpandedFloats.find(ResId) != ExpandedFloats.end()) Mapped |= 32; - if (SplitVectors.find(Res) != SplitVectors.end()) + if (ResId && SplitVectors.find(ResId) != SplitVectors.end()) Mapped |= 64; - if (WidenedVectors.find(Res) != WidenedVectors.end()) + if (ResId && WidenedVectors.find(ResId) != WidenedVectors.end()) Mapped |= 128; - if (PromotedFloats.find(Res) != PromotedFloats.end()) + if (ResId && PromotedFloats.find(ResId) != PromotedFloats.end()) Mapped |= 256; if (Node.getNodeId() != Processed) { @@ -224,9 +228,9 @@ bool DAGTypeLegalizer::run() { assert(N->getNodeId() == ReadyToProcess && "Node should be ready if on worklist!"); - DEBUG(dbgs() << "Legalizing node: "; N->dump(&DAG)); + LLVM_DEBUG(dbgs() << "Legalizing node: "; N->dump(&DAG)); if (IgnoreNodeResults(N)) { - DEBUG(dbgs() << "Ignoring node results\n"); + LLVM_DEBUG(dbgs() << "Ignoring node results\n"); goto ScanOperands; } @@ -234,11 +238,11 @@ bool DAGTypeLegalizer::run() { // types are illegal. for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) { EVT ResultVT = N->getValueType(i); - DEBUG(dbgs() << "Analyzing result type: " << - ResultVT.getEVTString() << "\n"); + LLVM_DEBUG(dbgs() << "Analyzing result type: " << ResultVT.getEVTString() + << "\n"); switch (getTypeAction(ResultVT)) { case TargetLowering::TypeLegal: - DEBUG(dbgs() << "Legal result type\n"); + LLVM_DEBUG(dbgs() << "Legal result type\n"); break; // The following calls must take care of *all* of the node's results, // not just the illegal result they were passed (this includes results @@ -296,11 +300,11 @@ ScanOperands: continue; const auto Op = N->getOperand(i); - DEBUG(dbgs() << "Analyzing operand: "; Op.dump(&DAG)); + LLVM_DEBUG(dbgs() << "Analyzing operand: "; Op.dump(&DAG)); EVT OpVT = Op.getValueType(); switch (getTypeAction(OpVT)) { case TargetLowering::TypeLegal: - DEBUG(dbgs() << "Legal operand\n"); + LLVM_DEBUG(dbgs() << "Legal operand\n"); continue; // The following calls must either replace all of the node's results // using ReplaceValueWith, and return "false"; or update the node's @@ -370,7 +374,8 @@ ScanOperands: } if (i == NumOperands) { - DEBUG(dbgs() << "Legally typed node: "; N->dump(&DAG); dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Legally typed node: "; N->dump(&DAG); + dbgs() << "\n"); } } NodeDone: @@ -490,9 +495,6 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) { if (N->getNodeId() != NewNode && N->getNodeId() != Unanalyzed) return N; - // Remove any stale map entries. - ExpungeNode(N); - // Okay, we know that this node is new. Recursively walk all of its operands // to see if they are new also. The depth of this walk is bounded by the size // of the new tree that was constructed (usually 2-3 nodes), so we don't worry @@ -543,7 +545,6 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) { // to remap the operands, since they are the same as the operands we // remapped above. N = M; - ExpungeNode(N); } } @@ -564,100 +565,25 @@ void DAGTypeLegalizer::AnalyzeNewValue(SDValue &Val) { RemapValue(Val); } -/// If N has a bogus mapping in ReplacedValues, eliminate it. -/// This can occur when a node is deleted then reallocated as a new node - -/// the mapping in ReplacedValues applies to the deleted node, not the new -/// one. -/// The only map that can have a deleted node as a source is ReplacedValues. -/// Other maps can have deleted nodes as targets, but since their looked-up -/// values are always immediately remapped using RemapValue, resulting in a -/// not-deleted node, this is harmless as long as ReplacedValues/RemapValue -/// always performs correct mappings. In order to keep the mapping correct, -/// ExpungeNode should be called on any new nodes *before* adding them as -/// either source or target to ReplacedValues (which typically means calling -/// Expunge when a new node is first seen, since it may no longer be marked -/// NewNode by the time it is added to ReplacedValues). -void DAGTypeLegalizer::ExpungeNode(SDNode *N) { - if (N->getNodeId() != NewNode) - return; - - // If N is not remapped by ReplacedValues then there is nothing to do. - unsigned i, e; - for (i = 0, e = N->getNumValues(); i != e; ++i) - if (ReplacedValues.find(SDValue(N, i)) != ReplacedValues.end()) - break; - - if (i == e) - return; - - // Remove N from all maps - this is expensive but rare. - - for (DenseMap<SDValue, SDValue>::iterator I = PromotedIntegers.begin(), - E = PromotedIntegers.end(); I != E; ++I) { - assert(I->first.getNode() != N); - RemapValue(I->second); - } - - for (DenseMap<SDValue, SDValue>::iterator I = SoftenedFloats.begin(), - E = SoftenedFloats.end(); I != E; ++I) { - assert(I->first.getNode() != N); - RemapValue(I->second); - } - - for (DenseMap<SDValue, SDValue>::iterator I = ScalarizedVectors.begin(), - E = ScalarizedVectors.end(); I != E; ++I) { - assert(I->first.getNode() != N); - RemapValue(I->second); - } - - for (DenseMap<SDValue, SDValue>::iterator I = WidenedVectors.begin(), - E = WidenedVectors.end(); I != E; ++I) { - assert(I->first.getNode() != N); - RemapValue(I->second); - } - - for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator - I = ExpandedIntegers.begin(), E = ExpandedIntegers.end(); I != E; ++I){ - assert(I->first.getNode() != N); - RemapValue(I->second.first); - RemapValue(I->second.second); - } - - for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator - I = ExpandedFloats.begin(), E = ExpandedFloats.end(); I != E; ++I) { - assert(I->first.getNode() != N); - RemapValue(I->second.first); - RemapValue(I->second.second); - } - - for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator - I = SplitVectors.begin(), E = SplitVectors.end(); I != E; ++I) { - assert(I->first.getNode() != N); - RemapValue(I->second.first); - RemapValue(I->second.second); - } - - for (DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.begin(), - E = ReplacedValues.end(); I != E; ++I) - RemapValue(I->second); - - for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) - ReplacedValues.erase(SDValue(N, i)); -} - /// If the specified value was already legalized to another value, /// replace it by that value. -void DAGTypeLegalizer::RemapValue(SDValue &N) { - DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(N); +void DAGTypeLegalizer::RemapValue(SDValue &V) { + auto Id = getTableId(V); + V = getSDValue(Id); +} + +void DAGTypeLegalizer::RemapId(TableId &Id) { + auto I = ReplacedValues.find(Id); if (I != ReplacedValues.end()) { + assert(Id != I->second && "Id is mapped to itself."); // Use path compression to speed up future lookups if values get multiply // replaced with other values. - RemapValue(I->second); - N = I->second; + RemapId(I->second); + Id = I->second; - // Note that it is possible to have N.getNode()->getNodeId() == NewNode at - // this point because it is possible for a node to be put in the map before - // being processed. + // Note that N = IdToValueMap[Id] it is possible to have + // N.getNode()->getNodeId() == NewNode at this point because it is possible + // for a node to be put in the map before being processed. } } @@ -714,19 +640,22 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) { assert(From.getNode() != To.getNode() && "Potential legalization loop!"); // If expansion produced new nodes, make sure they are properly marked. - ExpungeNode(From.getNode()); - AnalyzeNewValue(To); // Expunges To. + AnalyzeNewValue(To); // Anything that used the old node should now use the new one. Note that this // can potentially cause recursive merging. SmallSetVector<SDNode*, 16> NodesToAnalyze; NodeUpdateListener NUL(*this, NodesToAnalyze); do { - DAG.ReplaceAllUsesOfValueWith(From, To); - // The old node may still be present in a map like ExpandedIntegers or - // PromotedIntegers. Inform maps about the replacement. - ReplacedValues[From] = To; + // The old node may be present in a map like ExpandedIntegers or + // PromotedIntegers. Inform maps about the replacement. + auto FromId = getTableId(From); + auto ToId = getTableId(To); + + if (FromId != ToId) + ReplacedValues[FromId] = ToId; + DAG.ReplaceAllUsesOfValueWith(From, To); // Process the list of nodes that need to be reanalyzed. while (!NodesToAnalyze.empty()) { @@ -751,12 +680,15 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) { SDValue NewVal(M, i); if (M->getNodeId() == Processed) RemapValue(NewVal); - DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal); // OldVal may be a target of the ReplacedValues map which was marked // NewNode to force reanalysis because it was updated. Ensure that // anything that ReplacedValues mapped to OldVal will now be mapped // all the way to NewVal. - ReplacedValues[OldVal] = NewVal; + auto OldValId = getTableId(OldVal); + auto NewValId = getTableId(NewVal); + DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal); + if (OldValId != NewValId) + ReplacedValues[OldValId] = NewValId; } // The original node continues to exist in the DAG, marked NewNode. } @@ -773,9 +705,11 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { "Invalid type for promoted integer"); AnalyzeNewValue(Result); - SDValue &OpEntry = PromotedIntegers[Op]; - assert(!OpEntry.getNode() && "Node is already promoted!"); - OpEntry = Result; + auto &OpIdEntry = PromotedIntegers[getTableId(Op)]; + assert((OpIdEntry == 0) && "Node is already promoted!"); + OpIdEntry = getTableId(Result); + + DAG.transferDbgValues(Op, Result); } void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { @@ -788,15 +722,15 @@ void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { "Invalid type for softened float"); AnalyzeNewValue(Result); - SDValue &OpEntry = SoftenedFloats[Op]; + auto &OpIdEntry = SoftenedFloats[getTableId(Op)]; // Allow repeated calls to save f128 type nodes // or any node with type that transforms to itself. // Many operations on these types are not softened. - assert((!OpEntry.getNode()|| + assert(((OpIdEntry == 0) || Op.getValueType() == - TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) && + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) && "Node is already converted to integer!"); - OpEntry = Result; + OpIdEntry = getTableId(Result); } void DAGTypeLegalizer::SetPromotedFloat(SDValue Op, SDValue Result) { @@ -805,9 +739,9 @@ void DAGTypeLegalizer::SetPromotedFloat(SDValue Op, SDValue Result) { "Invalid type for promoted float"); AnalyzeNewValue(Result); - SDValue &OpEntry = PromotedFloats[Op]; - assert(!OpEntry.getNode() && "Node is already promoted!"); - OpEntry = Result; + auto &OpIdEntry = PromotedFloats[getTableId(Op)]; + assert((OpIdEntry == 0) && "Node is already promoted!"); + OpIdEntry = getTableId(Result); } void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { @@ -818,19 +752,17 @@ void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { "Invalid type for scalarized vector"); AnalyzeNewValue(Result); - SDValue &OpEntry = ScalarizedVectors[Op]; - assert(!OpEntry.getNode() && "Node is already scalarized!"); - OpEntry = Result; + auto &OpIdEntry = ScalarizedVectors[getTableId(Op)]; + assert((OpIdEntry == 0) && "Node is already scalarized!"); + OpIdEntry = getTableId(Result); } void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo, SDValue &Hi) { - std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op]; - RemapValue(Entry.first); - RemapValue(Entry.second); - assert(Entry.first.getNode() && "Operand isn't expanded"); - Lo = Entry.first; - Hi = Entry.second; + std::pair<TableId, TableId> &Entry = ExpandedIntegers[getTableId(Op)]; + assert((Entry.first != 0) && "Operand isn't expanded"); + Lo = getSDValue(Entry.first); + Hi = getSDValue(Entry.second); } void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo, @@ -856,20 +788,18 @@ void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo, } // Remember that this is the result of the node. - std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op]; - assert(!Entry.first.getNode() && "Node already expanded"); - Entry.first = Lo; - Entry.second = Hi; + std::pair<TableId, TableId> &Entry = ExpandedIntegers[getTableId(Op)]; + assert((Entry.first == 0) && "Node already expanded"); + Entry.first = getTableId(Lo); + Entry.second = getTableId(Hi); } void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo, SDValue &Hi) { - std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op]; - RemapValue(Entry.first); - RemapValue(Entry.second); - assert(Entry.first.getNode() && "Operand isn't expanded"); - Lo = Entry.first; - Hi = Entry.second; + std::pair<TableId, TableId> &Entry = ExpandedFloats[getTableId(Op)]; + assert((Entry.first != 0) && "Operand isn't expanded"); + Lo = getSDValue(Entry.first); + Hi = getSDValue(Entry.second); } void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo, @@ -882,21 +812,19 @@ void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo, AnalyzeNewValue(Lo); AnalyzeNewValue(Hi); - // Remember that this is the result of the node. - std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op]; - assert(!Entry.first.getNode() && "Node already expanded"); - Entry.first = Lo; - Entry.second = Hi; + std::pair<TableId, TableId> &Entry = ExpandedFloats[getTableId(Op)]; + assert((Entry.first == 0) && "Node already expanded"); + Entry.first = getTableId(Lo); + Entry.second = getTableId(Hi); } void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo, SDValue &Hi) { - std::pair<SDValue, SDValue> &Entry = SplitVectors[Op]; - RemapValue(Entry.first); - RemapValue(Entry.second); - assert(Entry.first.getNode() && "Operand isn't split"); - Lo = Entry.first; - Hi = Entry.second; + std::pair<TableId, TableId> &Entry = SplitVectors[getTableId(Op)]; + Lo = getSDValue(Entry.first); + Hi = getSDValue(Entry.second); + assert(Lo.getNode() && "Operand isn't split"); + ; } void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo, @@ -912,10 +840,10 @@ void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo, AnalyzeNewValue(Hi); // Remember that this is the result of the node. - std::pair<SDValue, SDValue> &Entry = SplitVectors[Op]; - assert(!Entry.first.getNode() && "Node already split"); - Entry.first = Lo; - Entry.second = Hi; + std::pair<TableId, TableId> &Entry = SplitVectors[getTableId(Op)]; + assert((Entry.first == 0) && "Node already split"); + Entry.first = getTableId(Lo); + Entry.second = getTableId(Hi); } void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { @@ -924,9 +852,9 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { "Invalid type for widened vector"); AnalyzeNewValue(Result); - SDValue &OpEntry = WidenedVectors[Op]; - assert(!OpEntry.getNode() && "Node already widened!"); - OpEntry = Result; + auto &OpIdEntry = WidenedVectors[getTableId(Op)]; + assert((OpIdEntry == 0) && "Node already widened!"); + OpIdEntry = getTableId(Result); } @@ -1064,11 +992,11 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) { EVT NVT = EVT::getIntegerVT(*DAG.getContext(), LVT.getSizeInBits() + HVT.getSizeInBits()); + EVT ShiftAmtVT = TLI.getShiftAmountTy(NVT, DAG.getDataLayout(), false); Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo); Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi); Hi = DAG.getNode(ISD::SHL, dlHi, NVT, Hi, - DAG.getConstant(LVT.getSizeInBits(), dlHi, - TLI.getPointerTy(DAG.getDataLayout()))); + DAG.getConstant(LVT.getSizeInBits(), dlHi, ShiftAmtVT)); return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 64cb80e0d853..2c6b1ee7900f 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -93,46 +93,81 @@ private: N->getOpcode() == ISD::Register; } + // Bijection from SDValue to unique id. As each created node gets a + // new id we do not need to worry about reuse expunging. Should we + // run out of ids, we can do a one time expensive compactifcation. + typedef unsigned TableId; + + TableId NextValueId = 1; + + SmallDenseMap<SDValue, TableId, 8> ValueToIdMap; + SmallDenseMap<TableId, SDValue, 8> IdToValueMap; + /// For integer nodes that are below legal width, this map indicates what /// promoted value to use. - SmallDenseMap<SDValue, SDValue, 8> PromotedIntegers; + SmallDenseMap<TableId, TableId, 8> PromotedIntegers; /// For integer nodes that need to be expanded this map indicates which /// operands are the expanded version of the input. - SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> ExpandedIntegers; + SmallDenseMap<TableId, std::pair<TableId, TableId>, 8> ExpandedIntegers; /// For floating-point nodes converted to integers of the same size, this map /// indicates the converted value to use. - SmallDenseMap<SDValue, SDValue, 8> SoftenedFloats; + SmallDenseMap<TableId, TableId, 8> SoftenedFloats; /// For floating-point nodes that have a smaller precision than the smallest /// supported precision, this map indicates what promoted value to use. - SmallDenseMap<SDValue, SDValue, 8> PromotedFloats; + SmallDenseMap<TableId, TableId, 8> PromotedFloats; /// For float nodes that need to be expanded this map indicates which operands /// are the expanded version of the input. - SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> ExpandedFloats; + SmallDenseMap<TableId, std::pair<TableId, TableId>, 8> ExpandedFloats; /// For nodes that are <1 x ty>, this map indicates the scalar value of type /// 'ty' to use. - SmallDenseMap<SDValue, SDValue, 8> ScalarizedVectors; + SmallDenseMap<TableId, TableId, 8> ScalarizedVectors; /// For nodes that need to be split this map indicates which operands are the /// expanded version of the input. - SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> SplitVectors; + SmallDenseMap<TableId, std::pair<TableId, TableId>, 8> SplitVectors; /// For vector nodes that need to be widened, indicates the widened value to /// use. - SmallDenseMap<SDValue, SDValue, 8> WidenedVectors; + SmallDenseMap<TableId, TableId, 8> WidenedVectors; /// For values that have been replaced with another, indicates the replacement /// value to use. - SmallDenseMap<SDValue, SDValue, 8> ReplacedValues; + SmallDenseMap<TableId, TableId, 8> ReplacedValues; /// This defines a worklist of nodes to process. In order to be pushed onto /// this worklist, all operands of a node must have already been processed. SmallVector<SDNode*, 128> Worklist; + TableId getTableId(SDValue V) { + assert(V.getNode() && "Getting TableId on SDValue()"); + + auto I = ValueToIdMap.find(V); + if (I != ValueToIdMap.end()) { + // replace if there's been a shift. + RemapId(I->second); + assert(I->second && "All Ids should be nonzero"); + return I->second; + } + // Add if it's not there. + ValueToIdMap.insert(std::make_pair(V, NextValueId)); + IdToValueMap.insert(std::make_pair(NextValueId, V)); + ++NextValueId; + assert(NextValueId != 0 && + "Ran out of Ids. Increase id type size or add compactification"); + return NextValueId - 1; + } + + const SDValue &getSDValue(TableId &Id) { + RemapId(Id); + assert(Id && "TableId should be non-zero"); + return IdToValueMap[Id]; + } + public: explicit DAGTypeLegalizer(SelectionDAG &dag) : TLI(dag.getTargetLoweringInfo()), DAG(dag), @@ -147,10 +182,25 @@ public: bool run(); void NoteDeletion(SDNode *Old, SDNode *New) { - ExpungeNode(Old); - ExpungeNode(New); - for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) - ReplacedValues[SDValue(Old, i)] = SDValue(New, i); + for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) { + TableId NewId = getTableId(SDValue(New, i)); + TableId OldId = getTableId(SDValue(Old, i)); + + if (OldId != NewId) + ReplacedValues[OldId] = NewId; + + // Delete Node from tables. + ValueToIdMap.erase(SDValue(Old, i)); + IdToValueMap.erase(OldId); + PromotedIntegers.erase(OldId); + ExpandedIntegers.erase(OldId); + SoftenedFloats.erase(OldId); + PromotedFloats.erase(OldId); + ExpandedFloats.erase(OldId); + ScalarizedVectors.erase(OldId); + SplitVectors.erase(OldId); + WidenedVectors.erase(OldId); + } } SelectionDAG &getDAG() const { return DAG; } @@ -158,9 +208,9 @@ public: private: SDNode *AnalyzeNewNode(SDNode *N); void AnalyzeNewValue(SDValue &Val); - void ExpungeNode(SDNode *N); void PerformExpensiveChecks(); - void RemapValue(SDValue &N); + void RemapId(TableId &Id); + void RemapValue(SDValue &V); // Common routines. SDValue BitConvertToInteger(SDValue Op); @@ -207,8 +257,8 @@ private: /// returns an i32, the lower 16 bits of which coincide with Op, and the upper /// 16 bits of which contain rubbish. SDValue GetPromotedInteger(SDValue Op) { - SDValue &PromotedOp = PromotedIntegers[Op]; - RemapValue(PromotedOp); + TableId &PromotedId = PromotedIntegers[getTableId(Op)]; + SDValue PromotedOp = getSDValue(PromotedId); assert(PromotedOp.getNode() && "Operand wasn't promoted?"); return PromotedOp; } @@ -282,7 +332,7 @@ private: SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo); // Integer Operand Promotion. - bool PromoteIntegerOperand(SDNode *N, unsigned OperandNo); + bool PromoteIntegerOperand(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_ANY_EXTEND(SDNode *N); SDValue PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N); SDValue PromoteIntOp_BITCAST(SDNode *N); @@ -373,11 +423,10 @@ private: bool ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi); // Integer Operand Expansion. - bool ExpandIntegerOperand(SDNode *N, unsigned OperandNo); + bool ExpandIntegerOperand(SDNode *N, unsigned OpNo); SDValue ExpandIntOp_BR_CC(SDNode *N); SDValue ExpandIntOp_SELECT_CC(SDNode *N); SDValue ExpandIntOp_SETCC(SDNode *N); - SDValue ExpandIntOp_SETCCE(SDNode *N); SDValue ExpandIntOp_SETCCCARRY(SDNode *N); SDValue ExpandIntOp_Shift(SDNode *N); SDValue ExpandIntOp_SINT_TO_FP(SDNode *N); @@ -403,16 +452,15 @@ private: /// stay in a register, the Op is not converted to an integer. /// In that case, the given op is returned. SDValue GetSoftenedFloat(SDValue Op) { - auto Iter = SoftenedFloats.find(Op); + TableId Id = getTableId(Op); + auto Iter = SoftenedFloats.find(Id); if (Iter == SoftenedFloats.end()) { assert(isSimpleLegalType(Op.getValueType()) && "Operand wasn't converted to integer?"); return Op; } - - SDValue &SoftenedOp = Iter->second; + SDValue SoftenedOp = getSDValue(Iter->second); assert(SoftenedOp.getNode() && "Unconverted op in SoftenedFloats?"); - RemapValue(SoftenedOp); return SoftenedOp; } void SetSoftenedFloat(SDValue Op, SDValue Result); @@ -531,7 +579,7 @@ private: void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi); // Float Operand Expansion. - bool ExpandFloatOperand(SDNode *N, unsigned OperandNo); + bool ExpandFloatOperand(SDNode *N, unsigned OpNo); SDValue ExpandFloatOp_BR_CC(SDNode *N); SDValue ExpandFloatOp_FCOPYSIGN(SDNode *N); SDValue ExpandFloatOp_FP_ROUND(SDNode *N); @@ -549,8 +597,8 @@ private: //===--------------------------------------------------------------------===// SDValue GetPromotedFloat(SDValue Op) { - SDValue &PromotedOp = PromotedFloats[Op]; - RemapValue(PromotedOp); + TableId &PromotedId = PromotedFloats[getTableId(Op)]; + SDValue PromotedOp = getSDValue(PromotedId); assert(PromotedOp.getNode() && "Operand wasn't promoted?"); return PromotedOp; } @@ -572,7 +620,7 @@ private: SDValue PromoteFloatRes_UNDEF(SDNode *N); SDValue PromoteFloatRes_XINT_TO_FP(SDNode *N); - bool PromoteFloatOperand(SDNode *N, unsigned ResNo); + bool PromoteFloatOperand(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo); @@ -589,15 +637,15 @@ private: /// element type, this returns the element. For example, if Op is a v1i32, /// Op = < i32 val >, this method returns val, an i32. SDValue GetScalarizedVector(SDValue Op) { - SDValue &ScalarizedOp = ScalarizedVectors[Op]; - RemapValue(ScalarizedOp); + TableId &ScalarizedId = ScalarizedVectors[getTableId(Op)]; + SDValue ScalarizedOp = getSDValue(ScalarizedId); assert(ScalarizedOp.getNode() && "Operand wasn't scalarized?"); return ScalarizedOp; } void SetScalarizedVector(SDValue Op, SDValue Result); // Vector Result Scalarization: <1 x ty> -> ty. - void ScalarizeVectorResult(SDNode *N, unsigned OpNo); + void ScalarizeVectorResult(SDNode *N, unsigned ResNo); SDValue ScalarizeVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo); SDValue ScalarizeVecRes_BinOp(SDNode *N); SDValue ScalarizeVecRes_TernaryOp(SDNode *N); @@ -646,13 +694,14 @@ private: void SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi); // Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>. - void SplitVectorResult(SDNode *N, unsigned OpNo); + void SplitVectorResult(SDNode *N, unsigned ResNo); void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -662,9 +711,9 @@ private: void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); - void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi); - void SplitVecRes_MLOAD(MaskedLoadSDNode *N, SDValue &Lo, SDValue &Hi); - void SplitVecRes_MGATHER(MaskedGatherSDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi); + void SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi); + void SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue &Lo, SDValue &Hi); void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo, @@ -684,7 +733,7 @@ private: SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo); SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo); SDValue SplitVecOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo); - SDValue SplitVecOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo); + SDValue SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, unsigned OpNo); SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N); SDValue SplitVecOp_VSETCC(SDNode *N); SDValue SplitVecOp_FP_ROUND(SDNode *N); @@ -701,8 +750,8 @@ private: /// method returns a v4i32 for which the first two elements are the same as /// those of Op, while the last two elements contain rubbish. SDValue GetWidenedVector(SDValue Op) { - SDValue &WidenedOp = WidenedVectors[Op]; - RemapValue(WidenedOp); + TableId &WidenedId = WidenedVectors[getTableId(Op)]; + SDValue WidenedOp = getSDValue(WidenedId); assert(WidenedOp.getNode() && "Operand wasn't widened?"); return WidenedOp; } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 993465ae9dc2..df3134828af5 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -300,6 +300,7 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) { Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2), Align); Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2), 0); + Chain = Hi.getValue(1); // Handle endianness of the load. if (TLI.hasBigEndianPartOrdering(OVT, DAG.getDataLayout())) @@ -307,7 +308,7 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) { // Modified the chain - switch anything that used the old chain to use // the new one. - ReplaceValueWith(SDValue(N, 1), Hi.getValue(1)); + ReplaceValueWith(SDValue(N, 1), Chain); } @@ -384,7 +385,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) { // Build a vector of twice the length out of the expanded elements. // For example <3 x i64> -> <6 x i32>. - std::vector<SDValue> NewElts; + SmallVector<SDValue, 16> NewElts; NewElts.reserve(NumElts*2); for (unsigned i = 0; i < NumElts; ++i) { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 6a141818bb6d..67928d4bdbd5 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -32,7 +32,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetLowering.h" @@ -41,6 +40,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include <cassert> #include <cstdint> @@ -63,7 +63,7 @@ class VectorLegalizer { /// legalizing the same thing more than once. SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes; - /// \brief Adds a node to the translation cache. + /// Adds a node to the translation cache. void AddLegalizedOperand(SDValue From, SDValue To) { LegalizedNodes.insert(std::make_pair(From, To)); // If someone requests legalization of the new node, return itself. @@ -71,55 +71,55 @@ class VectorLegalizer { LegalizedNodes.insert(std::make_pair(To, To)); } - /// \brief Legalizes the given node. + /// Legalizes the given node. SDValue LegalizeOp(SDValue Op); - /// \brief Assuming the node is legal, "legalize" the results. + /// Assuming the node is legal, "legalize" the results. SDValue TranslateLegalizeResults(SDValue Op, SDValue Result); - /// \brief Implements unrolling a VSETCC. + /// Implements unrolling a VSETCC. SDValue UnrollVSETCC(SDValue Op); - /// \brief Implement expand-based legalization of vector operations. + /// Implement expand-based legalization of vector operations. /// /// This is just a high-level routine to dispatch to specific code paths for /// operations to legalize them. SDValue Expand(SDValue Op); - /// \brief Implements expansion for FNEG; falls back to UnrollVectorOp if + /// Implements expansion for FNEG; falls back to UnrollVectorOp if /// FSUB isn't legal. /// /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if /// SINT_TO_FLOAT and SHR on vectors isn't legal. SDValue ExpandUINT_TO_FLOAT(SDValue Op); - /// \brief Implement expansion for SIGN_EXTEND_INREG using SRL and SRA. + /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA. SDValue ExpandSEXTINREG(SDValue Op); - /// \brief Implement expansion for ANY_EXTEND_VECTOR_INREG. + /// Implement expansion for ANY_EXTEND_VECTOR_INREG. /// /// Shuffles the low lanes of the operand into place and bitcasts to the proper /// type. The contents of the bits in the extended part of each element are /// undef. SDValue ExpandANY_EXTEND_VECTOR_INREG(SDValue Op); - /// \brief Implement expansion for SIGN_EXTEND_VECTOR_INREG. + /// Implement expansion for SIGN_EXTEND_VECTOR_INREG. /// /// Shuffles the low lanes of the operand into place, bitcasts to the proper /// type, then shifts left and arithmetic shifts right to introduce a sign /// extension. SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op); - /// \brief Implement expansion for ZERO_EXTEND_VECTOR_INREG. + /// Implement expansion for ZERO_EXTEND_VECTOR_INREG. /// /// Shuffles the low lanes of the operand into place and blends zeros into /// the remaining lanes, finally bitcasting to the proper type. SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op); - /// \brief Expand bswap of vectors into a shuffle if legal. + /// Expand bswap of vectors into a shuffle if legal. SDValue ExpandBSWAP(SDValue Op); - /// \brief Implement vselect in terms of XOR, AND, OR when blend is not + /// Implement vselect in terms of XOR, AND, OR when blend is not /// supported by the target. SDValue ExpandVSELECT(SDValue Op); SDValue ExpandSELECT(SDValue Op); @@ -130,19 +130,20 @@ class VectorLegalizer { SDValue ExpandBITREVERSE(SDValue Op); SDValue ExpandCTLZ(SDValue Op); SDValue ExpandCTTZ_ZERO_UNDEF(SDValue Op); - - /// \brief Implements vector promotion. + SDValue ExpandStrictFPOp(SDValue Op); + + /// Implements vector promotion. /// /// This is essentially just bitcasting the operands to a different type and /// bitcasting the result back to the original type. SDValue Promote(SDValue Op); - /// \brief Implements [SU]INT_TO_FP vector promotion. + /// Implements [SU]INT_TO_FP vector promotion. /// /// This is a [zs]ext of the input operand to a larger integer type. SDValue PromoteINT_TO_FP(SDValue Op); - /// \brief Implements FP_TO_[SU]INT vector promotion of the result type. + /// Implements FP_TO_[SU]INT vector promotion of the result type. /// /// It is promoted to a larger integer type. The result is then /// truncated back to the original type. @@ -152,7 +153,7 @@ public: VectorLegalizer(SelectionDAG& dag) : DAG(dag), TLI(dag.getTargetLoweringInfo()) {} - /// \brief Begin legalizer the vector operations in the DAG. + /// Begin legalizer the vector operations in the DAG. bool Run(); }; @@ -222,14 +223,16 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { for (const SDValue &Op : Node->op_values()) Ops.push_back(LegalizeOp(Op)); - SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops), 0); + SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops), + Op.getResNo()); bool HasVectorValue = false; if (Op.getOpcode() == ISD::LOAD) { LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); ISD::LoadExtType ExtType = LD->getExtensionType(); if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) { - DEBUG(dbgs() << "\nLegalizing extending vector load: "; Node->dump(&DAG)); + LLVM_DEBUG(dbgs() << "\nLegalizing extending vector load: "; + Node->dump(&DAG)); switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getValueType(0), LD->getMemoryVT())) { default: llvm_unreachable("This action is not supported yet!"); @@ -261,8 +264,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { EVT StVT = ST->getMemoryVT(); MVT ValVT = ST->getValue().getSimpleValueType(); if (StVT.isVector() && ST->isTruncatingStore()) { - DEBUG(dbgs() << "\nLegalizing truncating vector store: "; - Node->dump(&DAG)); + LLVM_DEBUG(dbgs() << "\nLegalizing truncating vector store: "; + Node->dump(&DAG)); switch (TLI.getTruncStoreAction(ValVT, StVT)) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: @@ -287,10 +290,34 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { if (!HasVectorValue) return TranslateLegalizeResults(Op, Result); - EVT QueryType; + TargetLowering::LegalizeAction Action = TargetLowering::Legal; switch (Op.getOpcode()) { default: return TranslateLegalizeResults(Op, Result); + case ISD::STRICT_FADD: + case ISD::STRICT_FSUB: + case ISD::STRICT_FMUL: + case ISD::STRICT_FDIV: + case ISD::STRICT_FSQRT: + case ISD::STRICT_FMA: + case ISD::STRICT_FPOW: + case ISD::STRICT_FPOWI: + case ISD::STRICT_FSIN: + case ISD::STRICT_FCOS: + case ISD::STRICT_FEXP: + case ISD::STRICT_FEXP2: + case ISD::STRICT_FLOG: + case ISD::STRICT_FLOG10: + case ISD::STRICT_FLOG2: + case ISD::STRICT_FRINT: + case ISD::STRICT_FNEARBYINT: + // These pseudo-ops get legalized as if they were their non-strict + // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT + // is also legal, but if ISD::FSQRT requires expansion then so does + // ISD::STRICT_FSQRT. + Action = TLI.getStrictFPOperationAction(Node->getOpcode(), + Node->getValueType(0)); + break; case ISD::ADD: case ISD::SUB: case ISD::MUL: @@ -366,42 +393,47 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::UMAX: case ISD::SMUL_LOHI: case ISD::UMUL_LOHI: - QueryType = Node->getValueType(0); + case ISD::FCANONICALIZE: + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); break; case ISD::FP_ROUND_INREG: - QueryType = cast<VTSDNode>(Node->getOperand(1))->getVT(); + Action = TLI.getOperationAction(Node->getOpcode(), + cast<VTSDNode>(Node->getOperand(1))->getVT()); break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: - QueryType = Node->getOperand(0).getValueType(); + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getOperand(0).getValueType()); break; case ISD::MSCATTER: - QueryType = cast<MaskedScatterSDNode>(Node)->getValue().getValueType(); + Action = TLI.getOperationAction(Node->getOpcode(), + cast<MaskedScatterSDNode>(Node)->getValue().getValueType()); break; case ISD::MSTORE: - QueryType = cast<MaskedStoreSDNode>(Node)->getValue().getValueType(); + Action = TLI.getOperationAction(Node->getOpcode(), + cast<MaskedStoreSDNode>(Node)->getValue().getValueType()); break; } - DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG)); + LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG)); - switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) { + switch (Action) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Promote: Result = Promote(Op); Changed = true; break; case TargetLowering::Legal: - DEBUG(dbgs() << "Legal node: nothing to do\n"); + LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n"); break; case TargetLowering::Custom: { - DEBUG(dbgs() << "Trying custom legalization\n"); + LLVM_DEBUG(dbgs() << "Trying custom legalization\n"); if (SDValue Tmp1 = TLI.LowerOperation(Op, DAG)) { - DEBUG(dbgs() << "Successfully custom legalized node\n"); + LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n"); Result = Tmp1; break; } - DEBUG(dbgs() << "Could not custom legalize node\n"); + LLVM_DEBUG(dbgs() << "Could not custom legalize node\n"); LLVM_FALLTHROUGH; } case TargetLowering::Expand: @@ -649,9 +681,14 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { Value = DAG.getBuildVector(Op.getNode()->getValueType(0), dl, Vals); } else { SDValue Scalarized = TLI.scalarizeVectorLoad(LD, DAG); - - NewChain = Scalarized.getValue(1); - Value = Scalarized.getValue(0); + // Skip past MERGE_VALUE node if known. + if (Scalarized->getOpcode() == ISD::MERGE_VALUES) { + NewChain = Scalarized.getOperand(1); + Value = Scalarized.getOperand(0); + } else { + NewChain = Scalarized.getValue(1); + Value = Scalarized.getValue(0); + } } AddLegalizedOperand(Op.getValue(0), Value); @@ -662,35 +699,6 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { SDValue VectorLegalizer::ExpandStore(SDValue Op) { StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); - - EVT StVT = ST->getMemoryVT(); - EVT MemSclVT = StVT.getScalarType(); - unsigned ScalarSize = MemSclVT.getSizeInBits(); - - // Round odd types to the next pow of two. - if (!isPowerOf2_32(ScalarSize)) { - // FIXME: This is completely broken and inconsistent with ExpandLoad - // handling. - - // For sub-byte element sizes, this ends up with 0 stride between elements, - // so the same element just gets re-written to the same location. There seem - // to be tests explicitly testing for this broken behavior though. tests - // for this broken behavior. - - LLVMContext &Ctx = *DAG.getContext(); - - EVT NewMemVT - = EVT::getVectorVT(Ctx, - MemSclVT.getIntegerVT(Ctx, NextPowerOf2(ScalarSize)), - StVT.getVectorNumElements()); - - SDValue NewVectorStore = DAG.getTruncStore( - ST->getChain(), SDLoc(Op), ST->getValue(), ST->getBasePtr(), - ST->getPointerInfo(), NewMemVT, ST->getAlignment(), - ST->getMemOperand()->getFlags(), ST->getAAInfo()); - ST = cast<StoreSDNode>(NewVectorStore.getNode()); - } - SDValue TF = TLI.scalarizeVectorStore(ST, DAG); AddLegalizedOperand(Op, TF); return TF; @@ -727,6 +735,24 @@ SDValue VectorLegalizer::Expand(SDValue Op) { return ExpandCTLZ(Op); case ISD::CTTZ_ZERO_UNDEF: return ExpandCTTZ_ZERO_UNDEF(Op); + case ISD::STRICT_FADD: + case ISD::STRICT_FSUB: + case ISD::STRICT_FMUL: + case ISD::STRICT_FDIV: + case ISD::STRICT_FSQRT: + case ISD::STRICT_FMA: + case ISD::STRICT_FPOW: + case ISD::STRICT_FPOWI: + case ISD::STRICT_FSIN: + case ISD::STRICT_FCOS: + case ISD::STRICT_FEXP: + case ISD::STRICT_FEXP2: + case ISD::STRICT_FLOG: + case ISD::STRICT_FLOG10: + case ISD::STRICT_FLOG2: + case ISD::STRICT_FRINT: + case ISD::STRICT_FNEARBYINT: + return ExpandStrictFPOp(Op); default: return DAG.UnrollVectorOp(Op.getNode()); } @@ -1020,7 +1046,7 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT); // Two to the power of half-word-size. - SDValue TWOHW = DAG.getConstantFP(1 << (BW / 2), DL, Op.getValueType()); + SDValue TWOHW = DAG.getConstantFP(1ULL << (BW / 2), DL, Op.getValueType()); // Clear upper part of LO, lower HI SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord); @@ -1113,6 +1139,53 @@ SDValue VectorLegalizer::ExpandCTTZ_ZERO_UNDEF(SDValue Op) { return DAG.UnrollVectorOp(Op.getNode()); } +SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) { + EVT VT = Op.getValueType(); + EVT EltVT = VT.getVectorElementType(); + unsigned NumElems = VT.getVectorNumElements(); + unsigned NumOpers = Op.getNumOperands(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT ValueVTs[] = {EltVT, MVT::Other}; + SDValue Chain = Op.getOperand(0); + SDLoc dl(Op); + + SmallVector<SDValue, 32> OpValues; + SmallVector<SDValue, 32> OpChains; + for (unsigned i = 0; i < NumElems; ++i) { + SmallVector<SDValue, 4> Opers; + SDValue Idx = DAG.getConstant(i, dl, + TLI.getVectorIdxTy(DAG.getDataLayout())); + + // The Chain is the first operand. + Opers.push_back(Chain); + + // Now process the remaining operands. + for (unsigned j = 1; j < NumOpers; ++j) { + SDValue Oper = Op.getOperand(j); + EVT OperVT = Oper.getValueType(); + + if (OperVT.isVector()) + Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + EltVT, Oper, Idx); + + Opers.push_back(Oper); + } + + SDValue ScalarOp = DAG.getNode(Op->getOpcode(), dl, ValueVTs, Opers); + + OpValues.push_back(ScalarOp.getValue(0)); + OpChains.push_back(ScalarOp.getValue(1)); + } + + SDValue Result = DAG.getBuildVector(VT, dl, OpValues); + SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains); + + AddLegalizedOperand(Op.getValue(0), Result); + AddLegalizedOperand(Op.getValue(1), NewChain); + + return NewChain; +} + SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { EVT VT = Op.getValueType(); unsigned NumElems = VT.getVectorNumElements(); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index df1cbeb92740..1cd43ace48f3 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -33,9 +33,8 @@ using namespace llvm; //===----------------------------------------------------------------------===// void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { - DEBUG(dbgs() << "Scalarize node result " << ResNo << ": "; - N->dump(&DAG); - dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Scalarize node result " << ResNo << ": "; N->dump(&DAG); + dbgs() << "\n"); SDValue R = SDValue(); switch (N->getOpcode()) { @@ -169,9 +168,14 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N, } SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) { + SDValue Op = N->getOperand(0); + if (Op.getValueType().isVector() + && Op.getValueType().getVectorNumElements() == 1 + && !isSimpleLegalType(Op.getValueType())) + Op = GetScalarizedVector(Op); EVT NewVT = N->getValueType(0).getVectorElementType(); return DAG.getNode(ISD::BITCAST, SDLoc(N), - NewVT, N->getOperand(0)); + NewVT, Op); } SDValue DAGTypeLegalizer::ScalarizeVecRes_BUILD_VECTOR(SDNode *N) { @@ -338,8 +342,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) { ScalarBool = TargetLowering::UndefinedBooleanContent; } + EVT CondVT = Cond.getValueType(); if (ScalarBool != VecBool) { - EVT CondVT = Cond.getValueType(); switch (ScalarBool) { case TargetLowering::UndefinedBooleanContent: break; @@ -360,6 +364,11 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) { } } + // Truncate the condition if needed + auto BoolVT = getSetCCResultType(CondVT); + if (BoolVT.bitsLT(CondVT)) + Cond = DAG.getNode(ISD::TRUNCATE, SDLoc(N), BoolVT, Cond); + return DAG.getSelect(SDLoc(N), LHS.getValueType(), Cond, LHS, GetScalarizedVector(N->getOperand(2))); @@ -433,9 +442,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) { //===----------------------------------------------------------------------===// bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { - DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": "; - N->dump(&DAG); - dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": "; N->dump(&DAG); + dbgs() << "\n"); SDValue Res = SDValue(); if (!Res.getNode()) { @@ -515,7 +523,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) { N->getValueType(0).getScalarType(), Elt); // Revectorize the result so the types line up with what the uses of this // expression expect. - return DAG.getBuildVector(N->getValueType(0), SDLoc(N), Op); + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Op); } /// The vectors to concatenate have length one - use a BUILD_VECTOR instead. @@ -618,9 +626,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) { /// invalid operands or may have other results that need legalization, we just /// know that (at least) one result needs vector splitting. void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { - DEBUG(dbgs() << "Split node result: "; - N->dump(&DAG); - dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Split node result: "; N->dump(&DAG); dbgs() << "\n"); SDValue Lo, Hi; // See if the target wants to custom expand this node. @@ -749,6 +755,25 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FMA: SplitVecRes_TernaryOp(N, Lo, Hi); break; + case ISD::STRICT_FADD: + case ISD::STRICT_FSUB: + case ISD::STRICT_FMUL: + case ISD::STRICT_FDIV: + case ISD::STRICT_FSQRT: + case ISD::STRICT_FMA: + case ISD::STRICT_FPOW: + case ISD::STRICT_FPOWI: + case ISD::STRICT_FSIN: + case ISD::STRICT_FCOS: + case ISD::STRICT_FEXP: + case ISD::STRICT_FEXP2: + case ISD::STRICT_FLOG: + case ISD::STRICT_FLOG10: + case ISD::STRICT_FLOG2: + case ISD::STRICT_FRINT: + case ISD::STRICT_FNEARBYINT: + SplitVecRes_StrictFPOp(N, Lo, Hi); + break; } // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -1028,6 +1053,56 @@ void DAGTypeLegalizer::SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo, Hi = DAG.getNode(Opcode, dl, OutHiVT, InHi); } +void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + unsigned NumOps = N->getNumOperands(); + SDValue Chain = N->getOperand(0); + EVT LoVT, HiVT; + SDLoc dl(N); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + + SmallVector<SDValue, 4> OpsLo; + SmallVector<SDValue, 4> OpsHi; + + // The Chain is the first operand. + OpsLo.push_back(Chain); + OpsHi.push_back(Chain); + + // Now process the remaining operands. + for (unsigned i = 1; i < NumOps; ++i) { + SDValue Op = N->getOperand(i); + SDValue OpLo = Op; + SDValue OpHi = Op; + + EVT InVT = Op.getValueType(); + if (InVT.isVector()) { + // If the input also splits, handle it directly for a + // compile time speedup. Otherwise split it by hand. + if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) + GetSplitVector(Op, OpLo, OpHi); + else + std::tie(OpLo, OpHi) = DAG.SplitVectorOperand(N, i); + } + + OpsLo.push_back(OpLo); + OpsHi.push_back(OpHi); + } + + EVT LoValueVTs[] = {LoVT, MVT::Other}; + EVT HiValueVTs[] = {HiVT, MVT::Other}; + Lo = DAG.getNode(N->getOpcode(), dl, LoValueVTs, OpsLo); + Hi = DAG.getNode(N->getOpcode(), dl, HiValueVTs, OpsHi); + + // Build a factor node to remember that this Op is independent of the + // other one. + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + Lo.getValue(1), Hi.getValue(1)); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Chain); +} + void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Vec = N->getOperand(0); @@ -1200,16 +1275,16 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG, MLD->isExpandingLoad()); + unsigned HiOffset = LoMemVT.getStoreSize(); - MMO = DAG.getMachineFunction(). - getMachineMemOperand(MLD->getPointerInfo(), - MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), - SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); + MMO = DAG.getMachineFunction().getMachineMemOperand( + MLD->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOLoad, + HiMemVT.getStoreSize(), SecondHalfAlignment, MLD->getAAInfo(), + MLD->getRanges()); Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, HiMemVT, MMO, ExtType, MLD->isExpandingLoad()); - // Build a factor node to remember that this load is independent of the // other one. Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), @@ -1232,6 +1307,7 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue Mask = MGT->getMask(); SDValue Src0 = MGT->getValue(); SDValue Index = MGT->getIndex(); + SDValue Scale = MGT->getScale(); unsigned Alignment = MGT->getOriginalAlignment(); // Split Mask operand @@ -1263,11 +1339,11 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), Alignment, MGT->getAAInfo(), MGT->getRanges()); - SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo}; + SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo, Scale}; Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo, MMO); - SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi}; + SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi, Scale}; Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, OpsHi, MMO); @@ -1365,8 +1441,8 @@ void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, std::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT); if (TLI.isTypeLegal(SrcVT) && !TLI.isTypeLegal(SplitSrcVT) && TLI.isTypeLegal(NewSrcVT) && TLI.isTypeLegal(SplitLoVT)) { - DEBUG(dbgs() << "Split vector extend via incremental extend:"; - N->dump(&DAG); dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Split vector extend via incremental extend:"; + N->dump(&DAG); dbgs() << "\n"); // Extend the source vector by one step. SDValue NewSrc = DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0)); @@ -1501,9 +1577,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, /// the node are known to be legal, but other operands of the node may need /// legalization as well as the specified one. bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { - DEBUG(dbgs() << "Split node operand: "; - N->dump(&DAG); - dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Split node operand: "; N->dump(&DAG); dbgs() << "\n"); SDValue Res = SDValue(); // See if the target wants to custom split this node. @@ -1683,8 +1757,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo) { // Use the appropriate scalar instruction on the split subvectors before // reducing the now partially reduced smaller vector. - SDValue Partial = DAG.getNode(CombineOpc, dl, LoOpVT, Lo, Hi); - return DAG.getNode(N->getOpcode(), dl, ResVT, Partial); + SDValue Partial = DAG.getNode(CombineOpc, dl, LoOpVT, Lo, Hi, N->getFlags()); + return DAG.getNode(N->getOpcode(), dl, ResVT, Partial, N->getFlags()); } SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) { @@ -1810,6 +1884,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, SDValue Ch = MGT->getChain(); SDValue Ptr = MGT->getBasePtr(); SDValue Index = MGT->getIndex(); + SDValue Scale = MGT->getScale(); SDValue Mask = MGT->getMask(); SDValue Src0 = MGT->getValue(); unsigned Alignment = MGT->getOriginalAlignment(); @@ -1842,7 +1917,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), Alignment, MGT->getAAInfo(), MGT->getRanges()); - SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo}; + SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo, Scale}; SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo, MMO); @@ -1852,7 +1927,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, Alignment, MGT->getAAInfo(), MGT->getRanges()); - SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi}; + SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi, Scale}; SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, OpsHi, MMO); @@ -1916,10 +1991,12 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, N->isCompressingStore()); - MMO = DAG.getMachineFunction(). - getMachineMemOperand(N->getPointerInfo(), - MachineMemOperand::MOStore, HiMemVT.getStoreSize(), - SecondHalfAlignment, N->getAAInfo(), N->getRanges()); + unsigned HiOffset = LoMemVT.getStoreSize(); + + MMO = DAG.getMachineFunction().getMachineMemOperand( + N->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOStore, + HiMemVT.getStoreSize(), SecondHalfAlignment, N->getAAInfo(), + N->getRanges()); Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, N->isTruncatingStore(), N->isCompressingStore()); @@ -1935,6 +2012,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, SDValue Ptr = N->getBasePtr(); SDValue Mask = N->getMask(); SDValue Index = N->getIndex(); + SDValue Scale = N->getScale(); SDValue Data = N->getValue(); EVT MemoryVT = N->getMemoryVT(); unsigned Alignment = N->getOriginalAlignment(); @@ -1970,7 +2048,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, MachineMemOperand::MOStore, LoMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges()); - SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo}; + SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Scale}; Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(), DL, OpsLo, MMO); @@ -1982,7 +2060,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, // The order of the Scatter operation after split is well defined. The "Hi" // part comes after the "Lo". So these two operations should be chained one // after another. - SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi}; + SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi, Scale}; return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), DL, OpsHi, MMO); } @@ -2005,6 +2083,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + // Scalarize if the split halves are not byte-sized. + if (!LoMemVT.isByteSized() || !HiMemVT.isByteSized()) + return TLI.scalarizeVectorStore(N, DAG); + unsigned IncrementSize = LoMemVT.getSizeInBits()/8; if (isTruncating) @@ -2089,9 +2171,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) { return SplitVecOp_UnaryOp(N); SDLoc DL(N); - // Extract the halves of the input via extract_subvector. + // Get the split input vector. SDValue InLoVec, InHiVec; - std::tie(InLoVec, InHiVec) = DAG.SplitVector(InVec, DL); + GetSplitVector(InVec, InLoVec, InHiVec); // Truncate them to 1/2 the element size. EVT HalfElementVT = IsFloat ? EVT::getFloatingPointVT(InElementSize/2) : @@ -2164,9 +2246,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_FCOPYSIGN(SDNode *N) { //===----------------------------------------------------------------------===// void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { - DEBUG(dbgs() << "Widen node result " << ResNo << ": "; - N->dump(&DAG); - dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Widen node result " << ResNo << ": "; N->dump(&DAG); + dbgs() << "\n"); // See if the target wants to custom widen this node. if (CustomWidenLowerNode(N, N->getValueType(ResNo))) @@ -2948,6 +3029,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) { SDValue Mask = N->getMask(); EVT MaskVT = Mask.getValueType(); SDValue Src0 = GetWidenedVector(N->getValue()); + SDValue Scale = N->getScale(); unsigned NumElts = WideVT.getVectorNumElements(); SDLoc dl(N); @@ -2963,7 +3045,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) { Index.getValueType().getScalarType(), NumElts); Index = ModifyToType(Index, WideIndexVT); - SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index }; + SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index, Scale }; SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), N->getMemoryVT(), dl, Ops, N->getMemOperand()); @@ -3309,9 +3391,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) { // Widen Vector Operand //===----------------------------------------------------------------------===// bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { - DEBUG(dbgs() << "Widen node operand " << OpNo << ": "; - N->dump(&DAG); - dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Widen node operand " << OpNo << ": "; N->dump(&DAG); + dbgs() << "\n"); SDValue Res = SDValue(); // See if the target wants to custom widen this node. @@ -3420,7 +3501,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) { // low lanes. switch (N->getOpcode()) { default: - llvm_unreachable("Extend legalization on on extend operation!"); + llvm_unreachable("Extend legalization on extend operation!"); case ISD::ANY_EXTEND: return DAG.getAnyExtendVectorInReg(InOp, DL, VT); case ISD::SIGN_EXTEND: @@ -3544,6 +3625,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { // vector type. StoreSDNode *ST = cast<StoreSDNode>(N); + if (!ST->getMemoryVT().getScalarType().isByteSized()) + return TLI.scalarizeVectorStore(ST, DAG); + SmallVector<SDValue, 16> StChain; if (ST->isTruncatingStore()) GenWidenVectorTruncStores(StChain, ST); @@ -3587,6 +3671,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) { SDValue DataOp = MSC->getValue(); SDValue Mask = MSC->getMask(); EVT MaskVT = Mask.getValueType(); + SDValue Scale = MSC->getScale(); // Widen the value. SDValue WideVal = GetWidenedVector(DataOp); @@ -3606,7 +3691,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) { NumElts); Index = ModifyToType(Index, WideIndexVT); - SDValue Ops[] = {MSC->getChain(), WideVal, Mask, MSC->getBasePtr(), Index}; + SDValue Ops[] = {MSC->getChain(), WideVal, Mask, MSC->getBasePtr(), Index, + Scale}; return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(), dl, Ops, MSC->getMemOperand()); @@ -3616,6 +3702,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { SDValue InOp0 = GetWidenedVector(N->getOperand(0)); SDValue InOp1 = GetWidenedVector(N->getOperand(1)); SDLoc dl(N); + EVT VT = N->getValueType(0); // WARNING: In this code we widen the compare instruction with garbage. // This garbage may contain denormal floats which may be slow. Is this a real @@ -3625,18 +3712,23 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { // Only some of the compared elements are legal. EVT SVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), InOp0.getValueType()); + // The result type is legal, if its vXi1, keep vXi1 for the new SETCC. + if (VT.getScalarType() == MVT::i1) + SVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, + SVT.getVectorNumElements()); + SDValue WideSETCC = DAG.getNode(ISD::SETCC, SDLoc(N), - SVT, InOp0, InOp1, N->getOperand(2)); + SVT, InOp0, InOp1, N->getOperand(2)); // Extract the needed results from the result vector. EVT ResVT = EVT::getVectorVT(*DAG.getContext(), SVT.getVectorElementType(), - N->getValueType(0).getVectorNumElements()); + VT.getVectorNumElements()); SDValue CC = DAG.getNode( ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC, DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - return PromoteTargetBoolean(CC, N->getValueType(0)); + return PromoteTargetBoolean(CC, VT); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h index cf92907a8b5f..7e6b57426338 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h @@ -32,7 +32,8 @@ public: enum DbgValueKind { SDNODE = 0, ///< Value is the result of an expression. CONST = 1, ///< Value is a constant. - FRAMEIX = 2 ///< Value is contents of a stack location. + FRAMEIX = 2, ///< Value is contents of a stack location. + VREG = 3 ///< Value is a virtual register. }; private: union { @@ -42,6 +43,7 @@ private: } s; const Value *Const; ///< Valid for constants. unsigned FrameIx; ///< Valid for stack objects. + unsigned VReg; ///< Valid for registers. } u; DIVariable *Var; DIExpression *Expr; @@ -69,12 +71,18 @@ public: u.Const = C; } - /// Constructor for frame indices. - SDDbgValue(DIVariable *Var, DIExpression *Expr, unsigned FI, DebugLoc dl, - unsigned O) - : Var(Var), Expr(Expr), DL(std::move(dl)), Order(O), IsIndirect(false) { - kind = FRAMEIX; - u.FrameIx = FI; + /// Constructor for virtual registers and frame indices. + SDDbgValue(DIVariable *Var, DIExpression *Expr, unsigned VRegOrFrameIdx, + bool IsIndirect, DebugLoc DL, unsigned Order, + enum DbgValueKind Kind) + : Var(Var), Expr(Expr), DL(DL), Order(Order), IsIndirect(IsIndirect) { + assert((Kind == VREG || Kind == FRAMEIX) && + "Invalid SDDbgValue constructor"); + kind = Kind; + if (kind == VREG) + u.VReg = VRegOrFrameIdx; + else + u.FrameIx = VRegOrFrameIdx; } /// Returns the kind. @@ -98,6 +106,9 @@ public: /// Returns the FrameIx for a stack object unsigned getFrameIx() const { assert (kind==FRAMEIX); return u.FrameIx; } + /// Returns the Virtual Register for a VReg + unsigned getVReg() const { assert (kind==VREG); return u.VReg; } + /// Returns whether this is an indirect value. bool isIndirect() const { return IsIndirect; } @@ -115,6 +126,28 @@ public: bool isInvalidated() const { return Invalid; } }; +/// Holds the information from a dbg_label node through SDISel. +/// We do not use SDValue here to avoid including its header. +class SDDbgLabel { + MDNode *Label; + DebugLoc DL; + unsigned Order; + +public: + SDDbgLabel(MDNode *Label, DebugLoc dl, unsigned O) + : Label(Label), DL(std::move(dl)), Order(O) {} + + /// Returns the MDNode pointer for the label. + MDNode *getLabel() const { return Label; } + + /// Returns the DebugLoc. + DebugLoc getDebugLoc() const { return DL; } + + /// Returns the SDNodeOrder. This is the order of the preceding node in the + /// input. + unsigned getOrder() const { return Order; } +}; + } // end llvm namespace #endif diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 698e14453d1d..3944d7df286d 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -13,6 +13,7 @@ #include "InstrEmitter.h" #include "ScheduleDAGSDNodes.h" +#include "SDNodeDbgValue.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" @@ -115,7 +116,7 @@ private: /// Schedule - Schedule the DAG using list scheduling. void ScheduleDAGFast::Schedule() { - DEBUG(dbgs() << "********** List Scheduling **********\n"); + LLVM_DEBUG(dbgs() << "********** List Scheduling **********\n"); NumLiveRegs = 0; LiveRegDefs.resize(TRI->getNumRegs(), nullptr); @@ -124,8 +125,8 @@ void ScheduleDAGFast::Schedule() { // Build the scheduling graph. BuildSchedGraph(nullptr); - DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) - SUnits[su].dumpAll(this)); + LLVM_DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) SUnits[su] + .dumpAll(this)); // Execute the actual scheduling loop. ListScheduleBottomUp(); @@ -180,8 +181,8 @@ void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { /// count of its predecessors. If a predecessor pending count is zero, add it to /// the Available queue. void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { - DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); - DEBUG(SU->dump(this)); + LLVM_DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); + LLVM_DEBUG(SU->dump(this)); assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!"); SU->setHeightToAtLeast(CurCycle); @@ -236,7 +237,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) return nullptr; - DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n"); + LLVM_DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n"); assert(NewNodes.size() == 2 && "Expected a load folding node!"); N = NewNodes[1]; @@ -346,7 +347,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { SU = NewSU; } - DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n"); + LLVM_DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n"); NewSU = Clone(SU); // New SUnit has the exact same predecessors. @@ -592,14 +593,14 @@ void ScheduleDAGFast::ListScheduleBottomUp() { // Issue copies, these can be expensive cross register class copies. SmallVector<SUnit*, 2> Copies; InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); - DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum - << " to SU #" << Copies.front()->NodeNum << "\n"); + LLVM_DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum + << " to SU #" << Copies.front()->NodeNum << "\n"); AddPred(TrySU, SDep(Copies.front(), SDep::Artificial)); NewDef = Copies.back(); } - DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum - << " to SU #" << TrySU->NodeNum << "\n"); + LLVM_DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum + << " to SU #" << TrySU->NodeNum << "\n"); LiveRegDefs[Reg] = NewDef; AddPred(NewDef, SDep(TrySU, SDep::Artificial)); TrySU->isAvailable = false; @@ -666,8 +667,8 @@ void ScheduleDAGLinearize::ScheduleNode(SDNode *N) { // These nodes do not need to be translated into MIs. return; - DEBUG(dbgs() << "\n*** Scheduling: "); - DEBUG(N->dump(DAG)); + LLVM_DEBUG(dbgs() << "\n*** Scheduling: "); + LLVM_DEBUG(N->dump(DAG)); Sequence.push_back(N); unsigned NumOps = N->getNumOperands(); @@ -713,7 +714,7 @@ static SDNode *findGluedUser(SDNode *N) { } void ScheduleDAGLinearize::Schedule() { - DEBUG(dbgs() << "********** DAG Linearization **********\n"); + LLVM_DEBUG(dbgs() << "********** DAG Linearization **********\n"); SmallVector<SDNode*, 8> Glues; unsigned DAGSize = 0; @@ -763,19 +764,29 @@ ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) { InstrEmitter Emitter(BB, InsertPos); DenseMap<SDValue, unsigned> VRBaseMap; - DEBUG({ - dbgs() << "\n*** Final schedule ***\n"; - }); + LLVM_DEBUG({ dbgs() << "\n*** Final schedule ***\n"; }); - // FIXME: Handle dbg_values. unsigned NumNodes = Sequence.size(); + MachineBasicBlock *BB = Emitter.getBlock(); for (unsigned i = 0; i != NumNodes; ++i) { SDNode *N = Sequence[NumNodes-i-1]; - DEBUG(N->dump(DAG)); + LLVM_DEBUG(N->dump(DAG)); Emitter.EmitNode(N, false, false, VRBaseMap); + + // Emit any debug values associated with the node. + if (N->getHasDebugValue()) { + MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos(); + for (auto DV : DAG->GetDbgValues(N)) { + if (DV->isInvalidated()) + continue; + if (auto *DbgMI = Emitter.EmitDbgValue(DV, VRBaseMap)) + BB->insert(InsertPos, DbgMI); + DV->setIsInvalidated(); + } + } } - DEBUG(dbgs() << '\n'); + LLVM_DEBUG(dbgs() << '\n'); InsertPos = Emitter.getInsertPos(); return Emitter.getBlock(); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 82337d43c5c9..43e8ffd3839c 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -26,7 +26,6 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SchedulerRegistry.h" @@ -37,6 +36,7 @@ #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/InlineAsm.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" @@ -46,6 +46,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cassert> @@ -346,8 +347,8 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, /// Schedule - Schedule the DAG using list scheduling. void ScheduleDAGRRList::Schedule() { - DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB) - << " '" << BB->getName() << "' **********\n"); + LLVM_DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB) + << " '" << BB->getName() << "' **********\n"); CurCycle = 0; IssueCount = 0; @@ -364,8 +365,7 @@ void ScheduleDAGRRList::Schedule() { // Build the scheduling graph. BuildSchedGraph(nullptr); - DEBUG(for (SUnit &SU : SUnits) - SU.dumpAll(this)); + LLVM_DEBUG(for (SUnit &SU : SUnits) SU.dumpAll(this)); Topo.InitDAGTopologicalSorting(); AvailableQueue->initNodes(SUnits); @@ -377,11 +377,11 @@ void ScheduleDAGRRList::Schedule() { AvailableQueue->releaseState(); - DEBUG({ - dbgs() << "*** Final schedule ***\n"; - dumpSchedule(); - dbgs() << '\n'; - }); + LLVM_DEBUG({ + dbgs() << "*** Final schedule ***\n"; + dumpSchedule(); + dbgs() << '\n'; + }); } //===----------------------------------------------------------------------===// @@ -728,13 +728,13 @@ static void resetVRegCycle(SUnit *SU); /// count of its predecessors. If a predecessor pending count is zero, add it to /// the Available queue. void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { - DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: "); - DEBUG(SU->dump(this)); + LLVM_DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: "); + LLVM_DEBUG(SU->dump(this)); #ifndef NDEBUG if (CurCycle < SU->getHeight()) - DEBUG(dbgs() << " Height [" << SU->getHeight() - << "] pipeline stall!\n"); + LLVM_DEBUG(dbgs() << " Height [" << SU->getHeight() + << "] pipeline stall!\n"); #endif // FIXME: Do not modify node height. It may interfere with @@ -827,8 +827,8 @@ void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and /// its predecessor states to reflect the change. void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { - DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); - DEBUG(SU->dump(this)); + LLVM_DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); + LLVM_DEBUG(SU->dump(this)); for (SDep &Pred : SU->Preds) { CapturePred(&Pred); @@ -1010,7 +1010,35 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) { computeLatency(LoadSU); } - DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); + bool isNewN = true; + SUnit *NewSU; + // This can only happen when isNewLoad is false. + if (N->getNodeId() != -1) { + NewSU = &SUnits[N->getNodeId()]; + // If NewSU has already been scheduled, we need to clone it, but this + // negates the benefit to unfolding so just return SU. + if (NewSU->isScheduled) + return SU; + isNewN = false; + } else { + NewSU = CreateNewSUnit(N); + N->setNodeId(NewSU->NodeNum); + + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { + if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { + NewSU->isTwoAddress = true; + break; + } + } + if (MCID.isCommutable()) + NewSU->isCommutable = true; + + InitNumRegDefsLeft(NewSU); + computeLatency(NewSU); + } + + LLVM_DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); // Now that we are committed to unfolding replace DAG Uses. for (unsigned i = 0; i != NumVals; ++i) @@ -1018,23 +1046,6 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) { DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals - 1), SDValue(LoadNode, 1)); - SUnit *NewSU = CreateNewSUnit(N); - assert(N->getNodeId() == -1 && "Node already inserted!"); - N->setNodeId(NewSU->NodeNum); - - const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); - for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { - if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { - NewSU->isTwoAddress = true; - break; - } - } - if (MCID.isCommutable()) - NewSU->isCommutable = true; - - InitNumRegDefsLeft(NewSU); - computeLatency(NewSU); - // Record all the edges to and from the old SU, by category. SmallVector<SDep, 4> ChainPreds; SmallVector<SDep, 4> ChainSuccs; @@ -1100,7 +1111,8 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) { if (isNewLoad) AvailableQueue->addNode(LoadSU); - AvailableQueue->addNode(NewSU); + if (isNewN) + AvailableQueue->addNode(NewSU); ++NumUnfolds; @@ -1117,12 +1129,13 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { if (!N) return nullptr; - DEBUG(dbgs() << "Considering duplicating the SU\n"); - DEBUG(SU->dump(this)); + LLVM_DEBUG(dbgs() << "Considering duplicating the SU\n"); + LLVM_DEBUG(SU->dump(this)); if (N->getGluedNode() && !TII->canCopyGluedNodeDuringSchedule(N)) { - DEBUG(dbgs() + LLVM_DEBUG( + dbgs() << "Giving up because it has incoming glue and the target does not " "want to copy it\n"); return nullptr; @@ -1133,7 +1146,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { MVT VT = N->getSimpleValueType(i); if (VT == MVT::Glue) { - DEBUG(dbgs() << "Giving up because it has outgoing glue\n"); + LLVM_DEBUG(dbgs() << "Giving up because it has outgoing glue\n"); return nullptr; } else if (VT == MVT::Other) TryUnfold = true; @@ -1141,8 +1154,9 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { for (const SDValue &Op : N->op_values()) { MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); if (VT == MVT::Glue && !TII->canCopyGluedNodeDuringSchedule(N)) { - DEBUG(dbgs() << "Giving up because it one of the operands is glue and " - "the target does not want to copy it\n"); + LLVM_DEBUG( + dbgs() << "Giving up because it one of the operands is glue and " + "the target does not want to copy it\n"); return nullptr; } } @@ -1159,7 +1173,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { return SU; } - DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n"); + LLVM_DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n"); NewSU = CreateClone(SU); // New SUnit has the exact same predecessors. @@ -1420,7 +1434,7 @@ void ScheduleDAGRRList::releaseInterferences(unsigned Reg) { // Furthermore, it may have been made available again, in which case it is // now already in the AvailableQueue. if (SU->isAvailable && !SU->NodeQueueId) { - DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n'); + LLVM_DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n'); AvailableQueue->push(SU); } if (i < Interferences.size()) @@ -1441,12 +1455,10 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { SmallVector<unsigned, 4> LRegs; if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) break; - DEBUG(dbgs() << " Interfering reg "; - if (LRegs[0] == TRI->getNumRegs()) - dbgs() << "CallResource"; - else - dbgs() << printReg(LRegs[0], TRI); - dbgs() << " SU #" << CurSU->NodeNum << '\n'); + LLVM_DEBUG(dbgs() << " Interfering reg "; + if (LRegs[0] == TRI->getNumRegs()) dbgs() << "CallResource"; + else dbgs() << printReg(LRegs[0], TRI); + dbgs() << " SU #" << CurSU->NodeNum << '\n'); std::pair<LRegsMapT::iterator, bool> LRegsPair = LRegsMap.insert(std::make_pair(CurSU, LRegs)); if (LRegsPair.second) { @@ -1492,17 +1504,17 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { if (!BtSU->isPending) AvailableQueue->remove(BtSU); } - DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum << ") to SU(" - << TrySU->NodeNum << ")\n"); + LLVM_DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum + << ") to SU(" << TrySU->NodeNum << ")\n"); AddPred(TrySU, SDep(BtSU, SDep::Artificial)); // If one or more successors has been unscheduled, then the current // node is no longer available. if (!TrySU->isAvailable || !TrySU->NodeQueueId) { - DEBUG(dbgs() << "TrySU not available; choosing node from queue\n"); + LLVM_DEBUG(dbgs() << "TrySU not available; choosing node from queue\n"); CurSU = AvailableQueue->pop(); } else { - DEBUG(dbgs() << "TrySU available\n"); + LLVM_DEBUG(dbgs() << "TrySU available\n"); // Available and in AvailableQueue AvailableQueue->remove(TrySU); CurSU = TrySU; @@ -1546,14 +1558,14 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { // Issue copies, these can be expensive cross register class copies. SmallVector<SUnit*, 2> Copies; InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); - DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum - << " to SU #" << Copies.front()->NodeNum << "\n"); + LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum + << " to SU #" << Copies.front()->NodeNum << "\n"); AddPred(TrySU, SDep(Copies.front(), SDep::Artificial)); NewDef = Copies.back(); } - DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum - << " to SU #" << TrySU->NodeNum << "\n"); + LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum + << " to SU #" << TrySU->NodeNum << "\n"); LiveRegDefs[Reg] = NewDef; AddPred(NewDef, SDep(TrySU, SDep::Artificial)); TrySU->isAvailable = false; @@ -1581,8 +1593,8 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { // priority. If it is not ready put it back. Schedule the node. Sequence.reserve(SUnits.size()); while (!AvailableQueue->empty() || !Interferences.empty()) { - DEBUG(dbgs() << "\nExamining Available:\n"; - AvailableQueue->dump(this)); + LLVM_DEBUG(dbgs() << "\nExamining Available:\n"; + AvailableQueue->dump(this)); // Pick the best node to schedule taking all constraints into // consideration. @@ -2045,8 +2057,8 @@ LLVM_DUMP_METHOD void RegReductionPQBase::dumpRegPressure() const { unsigned Id = RC->getID(); unsigned RP = RegPressure[Id]; if (!RP) continue; - DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / " - << RegLimit[Id] << '\n'); + LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / " + << RegLimit[Id] << '\n'); } } #endif @@ -2198,14 +2210,15 @@ void RegReductionPQBase::scheduledNode(SUnit *SU) { if (RegPressure[RCId] < Cost) { // Register pressure tracking is imprecise. This can happen. But we try // hard not to let it happen because it likely results in poor scheduling. - DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n"); + LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum + << ") has too many regdefs\n"); RegPressure[RCId] = 0; } else { RegPressure[RCId] -= Cost; } } - DEBUG(dumpRegPressure()); + LLVM_DEBUG(dumpRegPressure()); } void RegReductionPQBase::unscheduledNode(SUnit *SU) { @@ -2285,7 +2298,7 @@ void RegReductionPQBase::unscheduledNode(SUnit *SU) { } } - DEBUG(dumpRegPressure()); + LLVM_DEBUG(dumpRegPressure()); } //===----------------------------------------------------------------------===// @@ -2380,7 +2393,7 @@ static void initVRegCycle(SUnit *SU) { if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU)) return; - DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n"); + LLVM_DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n"); SU->isVRegCycle = true; @@ -2418,7 +2431,7 @@ static bool hasVRegCycleUse(const SUnit *SU) { if (Pred.isCtrl()) continue; // ignore chain preds if (Pred.getSUnit()->isVRegCycle && Pred.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) { - DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n"); + LLVM_DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n"); return true; } } @@ -2478,9 +2491,9 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, int LDepth = left->getDepth() - LPenalty; int RDepth = right->getDepth() - RPenalty; if (LDepth != RDepth) { - DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum - << ") depth " << LDepth << " vs SU (" << right->NodeNum - << ") depth " << RDepth << "\n"); + LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum + << ") depth " << LDepth << " vs SU (" << right->NodeNum + << ") depth " << RDepth << "\n"); return LDepth < RDepth ? 1 : -1; } if (left->Latency != right->Latency) @@ -2502,9 +2515,9 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { static const char *const PhysRegMsg[] = { " has no physreg", " defines a physreg" }; #endif - DEBUG(dbgs() << " SU (" << left->NodeNum << ") " - << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") " - << PhysRegMsg[RHasPhysReg] << "\n"); + LLVM_DEBUG(dbgs() << " SU (" << left->NodeNum << ") " + << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum + << ") " << PhysRegMsg[RHasPhysReg] << "\n"); return LHasPhysReg < RHasPhysReg; } } @@ -2648,13 +2661,13 @@ bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { // Avoid causing spills. If register pressure is high, schedule for // register pressure reduction. if (LHigh && !RHigh) { - DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU(" - << right->NodeNum << ")\n"); + LLVM_DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU(" + << right->NodeNum << ")\n"); return true; } else if (!LHigh && RHigh) { - DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU(" - << left->NodeNum << ")\n"); + LLVM_DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU(" + << left->NodeNum << ")\n"); return false; } if (!LHigh && !RHigh) { @@ -2716,8 +2729,9 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { RPDiff = SPQ->RegPressureDiff(right, RLiveUses); } if (!DisableSchedRegPressure && LPDiff != RPDiff) { - DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff - << " != SU(" << right->NodeNum << "): " << RPDiff << "\n"); + LLVM_DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum + << "): " << LPDiff << " != SU(" << right->NodeNum + << "): " << RPDiff << "\n"); return LPDiff > RPDiff; } @@ -2729,8 +2743,9 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { } if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) { - DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses - << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n"); + LLVM_DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses + << " != SU(" << right->NodeNum << "): " << RLiveUses + << "\n"); return LLiveUses < RLiveUses; } @@ -2744,9 +2759,9 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { if (!DisableSchedCriticalPath) { int spread = (int)left->getDepth() - (int)right->getDepth(); if (std::abs(spread) > MaxReorderWindow) { - DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): " - << left->getDepth() << " != SU(" << right->NodeNum << "): " - << right->getDepth() << "\n"); + LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): " + << left->getDepth() << " != SU(" << right->NodeNum + << "): " << right->getDepth() << "\n"); return left->getDepth() < right->getDepth(); } } @@ -2967,9 +2982,10 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { // Ok, the transformation is safe and the heuristics suggest it is // profitable. Update the graph. - DEBUG(dbgs() << " Prescheduling SU #" << SU.NodeNum - << " next to PredSU #" << PredSU->NodeNum - << " to guide scheduling in the presence of multiple uses\n"); + LLVM_DEBUG( + dbgs() << " Prescheduling SU #" << SU.NodeNum << " next to PredSU #" + << PredSU->NodeNum + << " to guide scheduling in the presence of multiple uses\n"); for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { SDep Edge = PredSU->Succs[i]; assert(!Edge.isAssignedRegDep()); @@ -3058,8 +3074,9 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() { (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) || (!SU.isCommutable && SuccSU->isCommutable)) && !scheduleDAG->IsReachable(SuccSU, &SU)) { - DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #" - << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); + LLVM_DEBUG(dbgs() + << " Adding a pseudo-two-addr edge from SU #" + << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); scheduleDAG->AddPred(&SU, SDep(SuccSU, SDep::Artificial)); } } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index c09b47af26a6..430d8fb34476 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -27,6 +27,7 @@ #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -243,7 +244,7 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { return; // Sort them in increasing order. - std::sort(Offsets.begin(), Offsets.end()); + llvm::sort(Offsets.begin(), Offsets.end()); // Check if the loads are close enough. SmallVector<SDNode*, 4> Loads; @@ -910,6 +911,39 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { MachineBasicBlock *InsertBB = Emitter.getBlock(); MachineBasicBlock::iterator Pos = InsertBB->getFirstTerminator(); InsertBB->insert(Pos, DbgMIs.begin(), DbgMIs.end()); + + SDDbgInfo::DbgLabelIterator DLI = DAG->DbgLabelBegin(); + SDDbgInfo::DbgLabelIterator DLE = DAG->DbgLabelEnd(); + // Now emit the rest according to source order. + LastOrder = 0; + for (const auto &InstrOrder : Orders) { + unsigned Order = InstrOrder.first; + MachineInstr *MI = InstrOrder.second; + if (!MI) + continue; + + // Insert all SDDbgLabel's whose order(s) are before "Order". + for (; DLI != DLE && + (*DLI)->getOrder() >= LastOrder && (*DLI)->getOrder() < Order; + ++DLI) { + MachineInstr *DbgMI = Emitter.EmitDbgLabel(*DLI); + if (DbgMI) { + if (!LastOrder) + // Insert to start of the BB (after PHIs). + BB->insert(BBBegin, DbgMI); + else { + // Insert at the instruction, which may be in a different + // block, if the block was split by a custom inserter. + MachineBasicBlock::iterator Pos = MI; + MI->getParent()->insert(Pos, DbgMI); + } + } + } + if (DLI == DLE) + break; + + LastOrder = Order; + } } InsertPos = Emitter.getInsertPos(); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index a058942c5689..6417e16bd0fd 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -17,10 +17,10 @@ #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/MachineValueType.h" #include <cassert> #include <string> #include <vector> @@ -88,7 +88,7 @@ class InstrItineraryData; /// Clone - Creates a clone of the specified SUnit. It does not copy the /// predecessors / successors info nor the temporary scheduling states. /// - SUnit *Clone(SUnit *N); + SUnit *Clone(SUnit *Old); /// BuildSchedGraph - Build the SUnit graph from the selection dag that we /// are input. This SUnit graph is similar to the SelectionDAG, but diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index 07b46b9183ab..84055f8ecc1a 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -93,8 +93,8 @@ private: /// Schedule - Schedule the DAG using list scheduling. void ScheduleDAGVLIW::Schedule() { - DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB) - << " '" << BB->getName() << "' **********\n"); + LLVM_DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB) + << " '" << BB->getName() << "' **********\n"); // Build the scheduling graph. BuildSchedGraph(AA); @@ -151,8 +151,8 @@ void ScheduleDAGVLIW::releaseSuccessors(SUnit *SU) { /// count of its successors. If a successor pending count is zero, add it to /// the Available queue. void ScheduleDAGVLIW::scheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { - DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); - DEBUG(SU->dump(this)); + LLVM_DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); + LLVM_DEBUG(SU->dump(this)); Sequence.push_back(SU); assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); @@ -246,7 +246,7 @@ void ScheduleDAGVLIW::listScheduleTopDown() { } else if (!HasNoopHazards) { // Otherwise, we have a pipeline stall, but no other problem, just advance // the current cycle and try again. - DEBUG(dbgs() << "*** Advancing cycle, no work to do\n"); + LLVM_DEBUG(dbgs() << "*** Advancing cycle, no work to do\n"); HazardRec->AdvanceCycle(); ++NumStalls; ++CurCycle; @@ -254,7 +254,7 @@ void ScheduleDAGVLIW::listScheduleTopDown() { // Otherwise, we have no instructions to issue and we have instructions // that will fault if we don't do this right. This is the case for // processors without pipeline interlocks and other cases. - DEBUG(dbgs() << "*** Emitting noop\n"); + LLVM_DEBUG(dbgs() << "*** Emitting noop\n"); HazardRec->EmitNoop(); Sequence.push_back(nullptr); // NULL here means noop ++NumNoops; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 3ffc6fa9a059..48e03c6da68f 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -32,7 +32,6 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -58,6 +57,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" +#include "llvm/Support/MachineValueType.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Mutex.h" @@ -89,11 +89,16 @@ void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {} #define DEBUG_TYPE "selectiondag" +static cl::opt<bool> EnableMemCpyDAGOpt("enable-memcpy-dag-opt", + cl::Hidden, cl::init(true), + cl::desc("Gang up loads and stores generated by inlining of memcpy")); + +static cl::opt<int> MaxLdStGlue("ldstmemcpy-glue-max", + cl::desc("Number limit for gluing ld/st of memcpy."), + cl::Hidden, cl::init(0)); + static void NewSDValueDbgMsg(SDValue V, StringRef Msg, SelectionDAG *G) { - DEBUG( - dbgs() << Msg; - V.getNode()->dump(G); - ); + LLVM_DEBUG(dbgs() << Msg; V.getNode()->dump(G);); } //===----------------------------------------------------------------------===// @@ -263,6 +268,52 @@ bool ISD::allOperandsUndef(const SDNode *N) { return true; } +bool ISD::matchUnaryPredicate(SDValue Op, + std::function<bool(ConstantSDNode *)> Match) { + if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) + return Match(Cst); + + if (ISD::BUILD_VECTOR != Op.getOpcode()) + return false; + + EVT SVT = Op.getValueType().getScalarType(); + for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { + auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i)); + if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst)) + return false; + } + return true; +} + +bool ISD::matchBinaryPredicate( + SDValue LHS, SDValue RHS, + std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match) { + if (LHS.getValueType() != RHS.getValueType()) + return false; + + if (auto *LHSCst = dyn_cast<ConstantSDNode>(LHS)) + if (auto *RHSCst = dyn_cast<ConstantSDNode>(RHS)) + return Match(LHSCst, RHSCst); + + if (ISD::BUILD_VECTOR != LHS.getOpcode() || + ISD::BUILD_VECTOR != RHS.getOpcode()) + return false; + + EVT SVT = LHS.getValueType().getScalarType(); + for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) { + auto *LHSCst = dyn_cast<ConstantSDNode>(LHS.getOperand(i)); + auto *RHSCst = dyn_cast<ConstantSDNode>(RHS.getOperand(i)); + if (!LHSCst || !RHSCst) + return false; + if (LHSCst->getValueType(0) != SVT || + LHSCst->getValueType(0) != RHSCst->getValueType(0)) + return false; + if (!Match(LHSCst, RHSCst)) + return false; + } + return true; +} + ISD::NodeType ISD::getExtForLoadExtType(bool IsFP, ISD::LoadExtType ExtType) { switch (ExtType) { case ISD::EXTLOAD: @@ -487,12 +538,41 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(ST->getPointerInfo().getAddrSpace()); break; } + case ISD::MLOAD: { + const MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N); + ID.AddInteger(MLD->getMemoryVT().getRawBits()); + ID.AddInteger(MLD->getRawSubclassData()); + ID.AddInteger(MLD->getPointerInfo().getAddrSpace()); + break; + } + case ISD::MSTORE: { + const MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N); + ID.AddInteger(MST->getMemoryVT().getRawBits()); + ID.AddInteger(MST->getRawSubclassData()); + ID.AddInteger(MST->getPointerInfo().getAddrSpace()); + break; + } + case ISD::MGATHER: { + const MaskedGatherSDNode *MG = cast<MaskedGatherSDNode>(N); + ID.AddInteger(MG->getMemoryVT().getRawBits()); + ID.AddInteger(MG->getRawSubclassData()); + ID.AddInteger(MG->getPointerInfo().getAddrSpace()); + break; + } + case ISD::MSCATTER: { + const MaskedScatterSDNode *MS = cast<MaskedScatterSDNode>(N); + ID.AddInteger(MS->getMemoryVT().getRawBits()); + ID.AddInteger(MS->getRawSubclassData()); + ID.AddInteger(MS->getPointerInfo().getAddrSpace()); + break; + } case ISD::ATOMIC_CMP_SWAP: case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: case ISD::ATOMIC_SWAP: case ISD::ATOMIC_LOAD_ADD: case ISD::ATOMIC_LOAD_SUB: case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_CLR: case ISD::ATOMIC_LOAD_OR: case ISD::ATOMIC_LOAD_XOR: case ISD::ATOMIC_LOAD_NAND: @@ -726,7 +806,7 @@ static void VerifySDNode(SDNode *N) { } #endif // NDEBUG -/// \brief Insert a newly allocated node into the DAG. +/// Insert a newly allocated node into the DAG. /// /// Handles insertion into the all nodes list and CSE map, as well as /// verification and other common operations when a new node is allocated. @@ -903,13 +983,16 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) void SelectionDAG::init(MachineFunction &NewMF, OptimizationRemarkEmitter &NewORE, - Pass *PassPtr) { + Pass *PassPtr, const TargetLibraryInfo *LibraryInfo, + DivergenceAnalysis * Divergence) { MF = &NewMF; SDAGISelPass = PassPtr; ORE = &NewORE; TLI = getSubtarget().getTargetLowering(); TSI = getSubtarget().getSelectionDAGInfo(); + LibInfo = LibraryInfo; Context = &MF->getFunction().getContext(); + DA = Divergence; } SelectionDAG::~SelectionDAG() { @@ -1077,21 +1160,25 @@ SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) { } SDValue SelectionDAG::getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT) { - EVT EltVT = VT.getScalarType(); - SDValue TrueValue; - switch (TLI->getBooleanContents(VT)) { - case TargetLowering::ZeroOrOneBooleanContent: - case TargetLowering::UndefinedBooleanContent: - TrueValue = getConstant(1, DL, VT); - break; - case TargetLowering::ZeroOrNegativeOneBooleanContent: - TrueValue = getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL, - VT); - break; - } + SDValue TrueValue = getBoolConstant(true, DL, VT, VT); return getNode(ISD::XOR, DL, VT, Val, TrueValue); } +SDValue SelectionDAG::getBoolConstant(bool V, const SDLoc &DL, EVT VT, + EVT OpVT) { + if (!V) + return getConstant(0, DL, VT); + + switch (TLI->getBooleanContents(OpVT)) { + case TargetLowering::ZeroOrOneBooleanContent: + case TargetLowering::UndefinedBooleanContent: + return getConstant(1, DL, VT); + case TargetLowering::ZeroOrNegativeOneBooleanContent: + return getAllOnesConstant(DL, VT); + } + llvm_unreachable("Unexpected boolean content enum!"); +} + SDValue SelectionDAG::getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isT, bool isO) { EVT EltVT = VT.getScalarType(); @@ -1184,7 +1271,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL, return SDValue(N, 0); if (!N) { - N = newSDNode<ConstantSDNode>(isT, isO, Elt, DL.getDebugLoc(), EltVT); + N = newSDNode<ConstantSDNode>(isT, isO, Elt, EltVT); CSEMap.InsertNode(N, IP); InsertNode(N); NewSDValueDbgMsg(SDValue(N, 0), "Creating constant: ", this); @@ -1227,7 +1314,7 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL, return SDValue(N, 0); if (!N) { - N = newSDNode<ConstantFPSDNode>(isTarget, &V, DL.getDebugLoc(), EltVT); + N = newSDNode<ConstantFPSDNode>(isTarget, &V, EltVT); CSEMap.InsertNode(N, IP); InsertNode(N); } @@ -1503,33 +1590,35 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, if (N1.isUndef()) commuteShuffle(N1, N2, MaskVec); - // If shuffling a splat, try to blend the splat instead. We do this here so - // that even when this arises during lowering we don't have to re-handle it. - auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) { - BitVector UndefElements; - SDValue Splat = BV->getSplatValue(&UndefElements); - if (!Splat) - return; + if (TLI->hasVectorBlend()) { + // If shuffling a splat, try to blend the splat instead. We do this here so + // that even when this arises during lowering we don't have to re-handle it. + auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) { + BitVector UndefElements; + SDValue Splat = BV->getSplatValue(&UndefElements); + if (!Splat) + return; - for (int i = 0; i < NElts; ++i) { - if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + NElts)) - continue; + for (int i = 0; i < NElts; ++i) { + if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + NElts)) + continue; - // If this input comes from undef, mark it as such. - if (UndefElements[MaskVec[i] - Offset]) { - MaskVec[i] = -1; - continue; - } + // If this input comes from undef, mark it as such. + if (UndefElements[MaskVec[i] - Offset]) { + MaskVec[i] = -1; + continue; + } - // If we can blend a non-undef lane, use that instead. - if (!UndefElements[i]) - MaskVec[i] = i + Offset; - } - }; - if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1)) - BlendSplat(N1BV, 0); - if (auto *N2BV = dyn_cast<BuildVectorSDNode>(N2)) - BlendSplat(N2BV, NElts); + // If we can blend a non-undef lane, use that instead. + if (!UndefElements[i]) + MaskVec[i] = i + Offset; + } + }; + if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1)) + BlendSplat(N1BV, 0); + if (auto *N2BV = dyn_cast<BuildVectorSDNode>(N2)) + BlendSplat(N2BV, NElts); + } // Canonicalize all index into lhs, -> shuffle lhs, undef // Canonicalize all index into rhs, -> shuffle rhs, undef @@ -1643,7 +1732,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, } SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) { - MVT VT = SV.getSimpleValueType(0); + EVT VT = SV.getValueType(0); SmallVector<int, 8> MaskVec(SV.getMask().begin(), SV.getMask().end()); ShuffleVectorSDNode::commuteMask(MaskVec); @@ -1661,6 +1750,7 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { return SDValue(E, 0); auto *N = newSDNode<RegisterSDNode>(RegNo, VT); + N->SDNodeBits.IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, DA); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); @@ -1870,19 +1960,15 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl) { + EVT OpVT = N1.getValueType(); + // These setcc operations always fold. switch (Cond) { default: break; case ISD::SETFALSE: - case ISD::SETFALSE2: return getConstant(0, dl, VT); + case ISD::SETFALSE2: return getBoolConstant(false, dl, VT, OpVT); case ISD::SETTRUE: - case ISD::SETTRUE2: { - TargetLowering::BooleanContent Cnt = - TLI->getBooleanContents(N1->getValueType(0)); - return getConstant( - Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, dl, - VT); - } + case ISD::SETTRUE2: return getBoolConstant(true, dl, VT, OpVT); case ISD::SETOEQ: case ISD::SETOGT: @@ -1905,16 +1991,16 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, switch (Cond) { default: llvm_unreachable("Unknown integer setcc!"); - case ISD::SETEQ: return getConstant(C1 == C2, dl, VT); - case ISD::SETNE: return getConstant(C1 != C2, dl, VT); - case ISD::SETULT: return getConstant(C1.ult(C2), dl, VT); - case ISD::SETUGT: return getConstant(C1.ugt(C2), dl, VT); - case ISD::SETULE: return getConstant(C1.ule(C2), dl, VT); - case ISD::SETUGE: return getConstant(C1.uge(C2), dl, VT); - case ISD::SETLT: return getConstant(C1.slt(C2), dl, VT); - case ISD::SETGT: return getConstant(C1.sgt(C2), dl, VT); - case ISD::SETLE: return getConstant(C1.sle(C2), dl, VT); - case ISD::SETGE: return getConstant(C1.sge(C2), dl, VT); + case ISD::SETEQ: return getBoolConstant(C1 == C2, dl, VT, OpVT); + case ISD::SETNE: return getBoolConstant(C1 != C2, dl, VT, OpVT); + case ISD::SETULT: return getBoolConstant(C1.ult(C2), dl, VT, OpVT); + case ISD::SETUGT: return getBoolConstant(C1.ugt(C2), dl, VT, OpVT); + case ISD::SETULE: return getBoolConstant(C1.ule(C2), dl, VT, OpVT); + case ISD::SETUGE: return getBoolConstant(C1.uge(C2), dl, VT, OpVT); + case ISD::SETLT: return getBoolConstant(C1.slt(C2), dl, VT, OpVT); + case ISD::SETGT: return getBoolConstant(C1.sgt(C2), dl, VT, OpVT); + case ISD::SETLE: return getBoolConstant(C1.sle(C2), dl, VT, OpVT); + case ISD::SETGE: return getBoolConstant(C1.sge(C2), dl, VT, OpVT); } } } @@ -1926,41 +2012,54 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, case ISD::SETEQ: if (R==APFloat::cmpUnordered) return getUNDEF(VT); LLVM_FALLTHROUGH; - case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, dl, VT); + case ISD::SETOEQ: return getBoolConstant(R==APFloat::cmpEqual, dl, VT, + OpVT); case ISD::SETNE: if (R==APFloat::cmpUnordered) return getUNDEF(VT); LLVM_FALLTHROUGH; - case ISD::SETONE: return getConstant(R==APFloat::cmpGreaterThan || - R==APFloat::cmpLessThan, dl, VT); + case ISD::SETONE: return getBoolConstant(R==APFloat::cmpGreaterThan || + R==APFloat::cmpLessThan, dl, VT, + OpVT); case ISD::SETLT: if (R==APFloat::cmpUnordered) return getUNDEF(VT); LLVM_FALLTHROUGH; - case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, dl, VT); + case ISD::SETOLT: return getBoolConstant(R==APFloat::cmpLessThan, dl, VT, + OpVT); case ISD::SETGT: if (R==APFloat::cmpUnordered) return getUNDEF(VT); LLVM_FALLTHROUGH; - case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, dl, VT); + case ISD::SETOGT: return getBoolConstant(R==APFloat::cmpGreaterThan, dl, + VT, OpVT); case ISD::SETLE: if (R==APFloat::cmpUnordered) return getUNDEF(VT); LLVM_FALLTHROUGH; - case ISD::SETOLE: return getConstant(R==APFloat::cmpLessThan || - R==APFloat::cmpEqual, dl, VT); + case ISD::SETOLE: return getBoolConstant(R==APFloat::cmpLessThan || + R==APFloat::cmpEqual, dl, VT, + OpVT); case ISD::SETGE: if (R==APFloat::cmpUnordered) return getUNDEF(VT); LLVM_FALLTHROUGH; - case ISD::SETOGE: return getConstant(R==APFloat::cmpGreaterThan || - R==APFloat::cmpEqual, dl, VT); - case ISD::SETO: return getConstant(R!=APFloat::cmpUnordered, dl, VT); - case ISD::SETUO: return getConstant(R==APFloat::cmpUnordered, dl, VT); - case ISD::SETUEQ: return getConstant(R==APFloat::cmpUnordered || - R==APFloat::cmpEqual, dl, VT); - case ISD::SETUNE: return getConstant(R!=APFloat::cmpEqual, dl, VT); - case ISD::SETULT: return getConstant(R==APFloat::cmpUnordered || - R==APFloat::cmpLessThan, dl, VT); - case ISD::SETUGT: return getConstant(R==APFloat::cmpGreaterThan || - R==APFloat::cmpUnordered, dl, VT); - case ISD::SETULE: return getConstant(R!=APFloat::cmpGreaterThan, dl, VT); - case ISD::SETUGE: return getConstant(R!=APFloat::cmpLessThan, dl, VT); + case ISD::SETOGE: return getBoolConstant(R==APFloat::cmpGreaterThan || + R==APFloat::cmpEqual, dl, VT, OpVT); + case ISD::SETO: return getBoolConstant(R!=APFloat::cmpUnordered, dl, VT, + OpVT); + case ISD::SETUO: return getBoolConstant(R==APFloat::cmpUnordered, dl, VT, + OpVT); + case ISD::SETUEQ: return getBoolConstant(R==APFloat::cmpUnordered || + R==APFloat::cmpEqual, dl, VT, + OpVT); + case ISD::SETUNE: return getBoolConstant(R!=APFloat::cmpEqual, dl, VT, + OpVT); + case ISD::SETULT: return getBoolConstant(R==APFloat::cmpUnordered || + R==APFloat::cmpLessThan, dl, VT, + OpVT); + case ISD::SETUGT: return getBoolConstant(R==APFloat::cmpGreaterThan || + R==APFloat::cmpUnordered, dl, VT, + OpVT); + case ISD::SETULE: return getBoolConstant(R!=APFloat::cmpGreaterThan, dl, + VT, OpVT); + case ISD::SETUGE: return getBoolConstant(R!=APFloat::cmpLessThan, dl, VT, + OpVT); } } else { // Ensure that the constant occurs on the RHS. @@ -2297,10 +2396,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, break; } - // Support big-endian targets when it becomes useful. bool IsLE = getDataLayout().isLittleEndian(); - if (!IsLE) - break; // Bitcast 'small element' vector to 'large element' scalar/vector. if ((BitWidth % SubBitWidth) == 0) { @@ -2319,8 +2415,9 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, for (unsigned i = 0; i != SubScale; ++i) { computeKnownBits(N0, Known2, SubDemandedElts.shl(i), Depth + 1); - Known.One |= Known2.One.zext(BitWidth).shl(SubBitWidth * i); - Known.Zero |= Known2.Zero.zext(BitWidth).shl(SubBitWidth * i); + unsigned Shifts = IsLE ? i : SubScale - 1 - i; + Known.One |= Known2.One.zext(BitWidth).shl(SubBitWidth * Shifts); + Known.Zero |= Known2.Zero.zext(BitWidth).shl(SubBitWidth * Shifts); } } @@ -2342,7 +2439,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, Known.Zero.setAllBits(); Known.One.setAllBits(); for (unsigned i = 0; i != NumElts; ++i) if (DemandedElts[i]) { - unsigned Offset = (i % SubScale) * BitWidth; + unsigned Shifts = IsLE ? i : NumElts - 1 - i; + unsigned Offset = (Shifts % SubScale) * BitWidth; Known.One &= Known2.One.lshr(Offset).trunc(BitWidth); Known.Zero &= Known2.Zero.lshr(Offset).trunc(BitWidth); // If we don't know any bits, early out. @@ -2441,6 +2539,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, break; case ISD::SMULO: case ISD::UMULO: + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: if (Op.getResNo() != 1) break; // The boolean result conforms to getBooleanContents. @@ -2904,11 +3003,38 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, } case ISD::SMIN: case ISD::SMAX: { - computeKnownBits(Op.getOperand(0), Known, DemandedElts, - Depth + 1); - // If we don't know any bits, early out. - if (Known.isUnknown()) - break; + // If we have a clamp pattern, we know that the number of sign bits will be + // the minimum of the clamp min/max range. + bool IsMax = (Opcode == ISD::SMAX); + ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr; + if ((CstLow = isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts))) + if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX)) + CstHigh = isConstOrDemandedConstSplat(Op.getOperand(0).getOperand(1), + DemandedElts); + if (CstLow && CstHigh) { + if (!IsMax) + std::swap(CstLow, CstHigh); + + const APInt &ValueLow = CstLow->getAPIntValue(); + const APInt &ValueHigh = CstHigh->getAPIntValue(); + if (ValueLow.sle(ValueHigh)) { + unsigned LowSignBits = ValueLow.getNumSignBits(); + unsigned HighSignBits = ValueHigh.getNumSignBits(); + unsigned MinSignBits = std::min(LowSignBits, HighSignBits); + if (ValueLow.isNegative() && ValueHigh.isNegative()) { + Known.One.setHighBits(MinSignBits); + break; + } + if (ValueLow.isNonNegative() && ValueHigh.isNonNegative()) { + Known.Zero.setHighBits(MinSignBits); + break; + } + } + } + + // Fallback - just get the shared known bits of the operands. + computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + if (Known.isUnknown()) break; // Early-out computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); Known.Zero &= Known2.Zero; Known.One &= Known2.One; @@ -3038,7 +3164,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, if (!DemandedElts) return 1; // No demanded elts, better to assume we don't know anything. - switch (Op.getOpcode()) { + unsigned Opcode = Op.getOpcode(); + switch (Opcode) { default: break; case ISD::AssertSext: Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits(); @@ -3189,7 +3316,32 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return std::min(Tmp, Tmp2); case ISD::SMIN: - case ISD::SMAX: + case ISD::SMAX: { + // If we have a clamp pattern, we know that the number of sign bits will be + // the minimum of the clamp min/max range. + bool IsMax = (Opcode == ISD::SMAX); + ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr; + if ((CstLow = isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts))) + if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX)) + CstHigh = isConstOrDemandedConstSplat(Op.getOperand(0).getOperand(1), + DemandedElts); + if (CstLow && CstHigh) { + if (!IsMax) + std::swap(CstLow, CstHigh); + if (CstLow->getAPIntValue().sle(CstHigh->getAPIntValue())) { + Tmp = CstLow->getAPIntValue().getNumSignBits(); + Tmp2 = CstHigh->getAPIntValue().getNumSignBits(); + return std::min(Tmp, Tmp2); + } + } + + // Fallback - just get the minimum number of sign bits of the operands. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1); + if (Tmp == 1) + return 1; // Early out. + Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth + 1); + return std::min(Tmp, Tmp2); + } case ISD::UMIN: case ISD::UMAX: Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1); @@ -3225,7 +3377,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, unsigned RotAmt = C->getAPIntValue().urem(VTBits); // Handle rotate right by N like a rotate left by 32-N. - if (Op.getOpcode() == ISD::ROTR) + if (Opcode == ISD::ROTR) RotAmt = (VTBits - RotAmt) % VTBits; // If we aren't rotating out all of the known-in sign bits, return the @@ -3423,10 +3575,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, } // Allow the target to implement this method for its nodes. - if (Op.getOpcode() >= ISD::BUILTIN_OP_END || - Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || - Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || - Op.getOpcode() == ISD::INTRINSIC_VOID) { + if (Opcode >= ISD::BUILTIN_OP_END || + Opcode == ISD::INTRINSIC_WO_CHAIN || + Opcode == ISD::INTRINSIC_W_CHAIN || + Opcode == ISD::INTRINSIC_VOID) { unsigned NumBits = TLI->ComputeNumSignBitsForTargetNode(Op, DemandedElts, *this, Depth); if (NumBits > 1) @@ -3487,17 +3639,33 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op) const { return false; } -bool SelectionDAG::isKnownNeverZero(SDValue Op) const { +bool SelectionDAG::isKnownNeverZeroFloat(SDValue Op) const { + assert(Op.getValueType().isFloatingPoint() && + "Floating point type expected"); + // If the value is a constant, we can obviously see if it is a zero or not. + // TODO: Add BuildVector support. if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op)) return !C->isZero(); + return false; +} + +bool SelectionDAG::isKnownNeverZero(SDValue Op) const { + assert(!Op.getValueType().isFloatingPoint() && + "Floating point types unsupported - use isKnownNeverZeroFloat"); + + // If the value is a constant, we can obviously see if it is a zero or not. + if (ISD::matchUnaryPredicate( + Op, [](ConstantSDNode *C) { return !C->isNullValue(); })) + return true; // TODO: Recognize more cases here. switch (Op.getOpcode()) { default: break; case ISD::OR: - if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) - return !C->isNullValue(); + if (isKnownNeverZero(Op.getOperand(1)) || + isKnownNeverZero(Op.getOperand(0))) + return true; break; } @@ -3517,6 +3685,8 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const { return false; } +// FIXME: unify with llvm::haveNoCommonBitsSet. +// FIXME: could also handle masked merge pattern (X & ~M) op (Y & M) bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const { assert(A.getValueType() == B.getValueType() && "Values must have the same type"); @@ -3841,11 +4011,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, else if (OpOpcode == ISD::UNDEF) return getUNDEF(VT); - // (ext (trunx x)) -> x + // (ext (trunc x)) -> x if (OpOpcode == ISD::TRUNCATE) { SDValue OpOp = Operand.getOperand(0); - if (OpOp.getValueType() == VT) + if (OpOp.getValueType() == VT) { + transferDbgValues(Operand, OpOp); return OpOp; + } } break; case ISD::TRUNCATE: @@ -3921,10 +4093,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, break; case ISD::FNEG: // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 - if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB) - // FIXME: FNEG has no fast-math-flags to propagate; use the FSUB's flags? + if ((getTarget().Options.UnsafeFPMath || Flags.hasNoSignedZeros()) && + OpOpcode == ISD::FSUB) return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1), - Operand.getOperand(0), Operand.getNode()->getFlags()); + Operand.getOperand(0), Flags); if (OpOpcode == ISD::FNEG) // --X -> X return Operand.getOperand(0); break; @@ -4314,24 +4486,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::FMUL: case ISD::FDIV: case ISD::FREM: - if (getTarget().Options.UnsafeFPMath) { - if (Opcode == ISD::FADD) { - // x+0 --> x - if (N2CFP && N2CFP->getValueAPF().isZero()) - return N1; - } else if (Opcode == ISD::FSUB) { - // x-0 --> x - if (N2CFP && N2CFP->getValueAPF().isZero()) - return N1; - } else if (Opcode == ISD::FMUL) { - // x*0 --> 0 - if (N2CFP && N2CFP->isZero()) - return N2; - // x*1 --> x - if (N2CFP && N2CFP->isExactlyValue(1.0)) - return N1; - } - } assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); assert(N1.getValueType() == N2.getValueType() && N1.getValueType() == VT && "Binary operator types must match!"); @@ -4448,12 +4602,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, break; } case ISD::EXTRACT_VECTOR_ELT: + assert(VT.getSizeInBits() >= N1.getValueType().getScalarSizeInBits() && + "The result of EXTRACT_VECTOR_ELT must be at least as wide as the \ + element type of the vector."); + // EXTRACT_VECTOR_ELT of an UNDEF is an UNDEF. if (N1.isUndef()) return getUNDEF(VT); // EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF - if (N2C && N2C->getZExtValue() >= N1.getValueType().getVectorNumElements()) + if (N2C && N2C->getAPIntValue().uge(N1.getValueType().getVectorNumElements())) return getUNDEF(VT); // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is @@ -4635,6 +4793,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } } + // Any FP binop with an undef operand is folded to NaN. This matches the + // behavior of the IR optimizer. + switch (Opcode) { + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: + if (N1.isUndef() || N2.isUndef()) + return getConstantFP(APFloat::getNaN(EVTToAPFloatSemantics(VT)), DL, VT); + } + // Canonicalize an UNDEF to the RHS, even over a constant. if (N1.isUndef()) { if (TLI->isCommutativeBinOp(Opcode)) { @@ -4644,22 +4814,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::FP_ROUND_INREG: case ISD::SIGN_EXTEND_INREG: case ISD::SUB: - case ISD::FSUB: - case ISD::FDIV: - case ISD::FREM: - case ISD::SRA: - return N1; // fold op(undef, arg2) -> undef + return getUNDEF(VT); // fold op(undef, arg2) -> undef case ISD::UDIV: case ISD::SDIV: case ISD::UREM: case ISD::SREM: + case ISD::SRA: case ISD::SRL: case ISD::SHL: - if (!VT.isVector()) - return getConstant(0, DL, VT); // fold op(undef, arg2) -> 0 - // For vectors, we can't easily build an all zero vector, just return - // the LHS. - return N2; + return getConstant(0, DL, VT); // fold op(undef, arg2) -> 0 } } } @@ -4681,32 +4844,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::SDIV: case ISD::UREM: case ISD::SREM: - return N2; // fold op(arg1, undef) -> undef - case ISD::FADD: - case ISD::FSUB: - case ISD::FMUL: - case ISD::FDIV: - case ISD::FREM: - if (getTarget().Options.UnsafeFPMath) - return N2; - break; - case ISD::MUL: - case ISD::AND: + case ISD::SRA: case ISD::SRL: case ISD::SHL: - if (!VT.isVector()) - return getConstant(0, DL, VT); // fold op(arg1, undef) -> 0 - // For vectors, we can't easily build an all zero vector, just return - // the LHS. - return N1; + return getUNDEF(VT); // fold op(arg1, undef) -> undef + case ISD::MUL: + case ISD::AND: + return getConstant(0, DL, VT); // fold op(arg1, undef) -> 0 case ISD::OR: - if (!VT.isVector()) - return getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, VT); - // For vectors, we can't easily build an all one vector, just return - // the LHS. - return N1; - case ISD::SRA: - return N1; + return getAllOnesConstant(DL, VT); } } @@ -4739,10 +4885,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, - SDValue N1, SDValue N2, SDValue N3) { + SDValue N1, SDValue N2, SDValue N3, + const SDNodeFlags Flags) { // Perform various simplifications. switch (Opcode) { case ISD::FMA: { + assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); + assert(N1.getValueType() == VT && N2.getValueType() == VT && + N3.getValueType() == VT && "FMA types must match!"); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2); ConstantFPSDNode *N3CFP = dyn_cast<ConstantFPSDNode>(N3); @@ -4833,10 +4983,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { + E->intersectFlagsWith(Flags); return SDValue(E, 0); + } N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); + N->setFlags(Flags); createOperands(N, Ops); CSEMap.InsertNode(N, IP); } else { @@ -5107,6 +5260,31 @@ static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { return MF.getFunction().optForSize(); } +static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl, + SmallVector<SDValue, 32> &OutChains, unsigned From, + unsigned To, SmallVector<SDValue, 16> &OutLoadChains, + SmallVector<SDValue, 16> &OutStoreChains) { + assert(OutLoadChains.size() && "Missing loads in memcpy inlining"); + assert(OutStoreChains.size() && "Missing stores in memcpy inlining"); + SmallVector<SDValue, 16> GluedLoadChains; + for (unsigned i = From; i < To; ++i) { + OutChains.push_back(OutLoadChains[i]); + GluedLoadChains.push_back(OutLoadChains[i]); + } + + // Chain for all loads. + SDValue LoadToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + GluedLoadChains); + + for (unsigned i = From; i < To; ++i) { + StoreSDNode *ST = dyn_cast<StoreSDNode>(OutStoreChains[i]); + SDValue NewStore = DAG.getTruncStore(LoadToken, dl, ST->getValue(), + ST->getBasePtr(), ST->getMemoryVT(), + ST->getMemOperand()); + OutChains.push_back(NewStore); + } +} + static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, unsigned Align, @@ -5171,7 +5349,9 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, MachineMemOperand::Flags MMOFlags = isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; - SmallVector<SDValue, 8> OutChains; + SmallVector<SDValue, 16> OutLoadChains; + SmallVector<SDValue, 16> OutStoreChains; + SmallVector<SDValue, 32> OutChains; unsigned NumMemOps = MemOps.size(); uint64_t SrcOff = 0, DstOff = 0; for (unsigned i = 0; i != NumMemOps; ++i) { @@ -5205,11 +5385,13 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, SubSlice.Length = VTSize; } Value = getMemsetStringVal(VT, dl, DAG, TLI, SubSlice); - if (Value.getNode()) + if (Value.getNode()) { Store = DAG.getStore(Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), DstPtrInfo.getWithOffset(DstOff), Align, MMOFlags); + OutChains.push_back(Store); + } } if (!Store.getNode()) { @@ -5231,17 +5413,61 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, DAG.getMemBasePlusOffset(Src, SrcOff, dl), SrcPtrInfo.getWithOffset(SrcOff), VT, MinAlign(SrcAlign, SrcOff), SrcMMOFlags); - OutChains.push_back(Value.getValue(1)); + OutLoadChains.push_back(Value.getValue(1)); + Store = DAG.getTruncStore( Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), DstPtrInfo.getWithOffset(DstOff), VT, Align, MMOFlags); + OutStoreChains.push_back(Store); } - OutChains.push_back(Store); SrcOff += VTSize; DstOff += VTSize; Size -= VTSize; } + unsigned GluedLdStLimit = MaxLdStGlue == 0 ? + TLI.getMaxGluedStoresPerMemcpy() : MaxLdStGlue; + unsigned NumLdStInMemcpy = OutStoreChains.size(); + + if (NumLdStInMemcpy) { + // It may be that memcpy might be converted to memset if it's memcpy + // of constants. In such a case, we won't have loads and stores, but + // just stores. In the absence of loads, there is nothing to gang up. + if ((GluedLdStLimit <= 1) || !EnableMemCpyDAGOpt) { + // If target does not care, just leave as it. + for (unsigned i = 0; i < NumLdStInMemcpy; ++i) { + OutChains.push_back(OutLoadChains[i]); + OutChains.push_back(OutStoreChains[i]); + } + } else { + // Ld/St less than/equal limit set by target. + if (NumLdStInMemcpy <= GluedLdStLimit) { + chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, 0, + NumLdStInMemcpy, OutLoadChains, + OutStoreChains); + } else { + unsigned NumberLdChain = NumLdStInMemcpy / GluedLdStLimit; + unsigned RemainingLdStInMemcpy = NumLdStInMemcpy % GluedLdStLimit; + unsigned GlueIter = 0; + + for (unsigned cnt = 0; cnt < NumberLdChain; ++cnt) { + unsigned IndexFrom = NumLdStInMemcpy - GlueIter - GluedLdStLimit; + unsigned IndexTo = NumLdStInMemcpy - GlueIter; + + chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, IndexFrom, IndexTo, + OutLoadChains, OutStoreChains); + GlueIter += GluedLdStLimit; + } + + // Residual ld/st. + if (RemainingLdStInMemcpy) { + chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, 0, + RemainingLdStInMemcpy, OutLoadChains, + OutStoreChains); + } + } + } + } return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } @@ -5334,7 +5560,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } -/// \brief Lower the call to 'memset' intrinsic function into a series of store +/// Lower the call to 'memset' intrinsic function into a series of store /// operations. /// /// \param DAG Selection DAG where lowered code is placed. @@ -5518,6 +5744,47 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, return CallResult.second; } +SDValue SelectionDAG::getAtomicMemcpy(SDValue Chain, const SDLoc &dl, + SDValue Dst, unsigned DstAlign, + SDValue Src, unsigned SrcAlign, + SDValue Size, Type *SizeTy, + unsigned ElemSz, bool isTailCall, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) { + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = getDataLayout().getIntPtrType(*getContext()); + Entry.Node = Dst; + Args.push_back(Entry); + + Entry.Node = Src; + Args.push_back(Entry); + + Entry.Ty = SizeTy; + Entry.Node = Size; + Args.push_back(Entry); + + RTLIB::Libcall LibraryCall = + RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(ElemSz); + if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) + report_fatal_error("Unsupported element size"); + + TargetLowering::CallLoweringInfo CLI(*this); + CLI.setDebugLoc(dl) + .setChain(Chain) + .setLibCallee(TLI->getLibcallCallingConv(LibraryCall), + Type::getVoidTy(*getContext()), + getExternalSymbol(TLI->getLibcallName(LibraryCall), + TLI->getPointerTy(getDataLayout())), + std::move(Args)) + .setDiscardResult() + .setTailCall(isTailCall); + + std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI); + return CallResult.second; +} + SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, bool isTailCall, @@ -5579,6 +5846,47 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, return CallResult.second; } +SDValue SelectionDAG::getAtomicMemmove(SDValue Chain, const SDLoc &dl, + SDValue Dst, unsigned DstAlign, + SDValue Src, unsigned SrcAlign, + SDValue Size, Type *SizeTy, + unsigned ElemSz, bool isTailCall, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) { + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = getDataLayout().getIntPtrType(*getContext()); + Entry.Node = Dst; + Args.push_back(Entry); + + Entry.Node = Src; + Args.push_back(Entry); + + Entry.Ty = SizeTy; + Entry.Node = Size; + Args.push_back(Entry); + + RTLIB::Libcall LibraryCall = + RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(ElemSz); + if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) + report_fatal_error("Unsupported element size"); + + TargetLowering::CallLoweringInfo CLI(*this); + CLI.setDebugLoc(dl) + .setChain(Chain) + .setLibCallee(TLI->getLibcallCallingConv(LibraryCall), + Type::getVoidTy(*getContext()), + getExternalSymbol(TLI->getLibcallName(LibraryCall), + TLI->getPointerTy(getDataLayout())), + std::move(Args)) + .setDiscardResult() + .setTailCall(isTailCall); + + std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI); + return CallResult.second; +} + SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, bool isTailCall, @@ -5641,6 +5949,46 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, return CallResult.second; } +SDValue SelectionDAG::getAtomicMemset(SDValue Chain, const SDLoc &dl, + SDValue Dst, unsigned DstAlign, + SDValue Value, SDValue Size, Type *SizeTy, + unsigned ElemSz, bool isTailCall, + MachinePointerInfo DstPtrInfo) { + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = getDataLayout().getIntPtrType(*getContext()); + Entry.Node = Dst; + Args.push_back(Entry); + + Entry.Ty = Type::getInt8Ty(*getContext()); + Entry.Node = Value; + Args.push_back(Entry); + + Entry.Ty = SizeTy; + Entry.Node = Size; + Args.push_back(Entry); + + RTLIB::Libcall LibraryCall = + RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(ElemSz); + if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) + report_fatal_error("Unsupported element size"); + + TargetLowering::CallLoweringInfo CLI(*this); + CLI.setDebugLoc(dl) + .setChain(Chain) + .setLibCallee(TLI->getLibcallCallingConv(LibraryCall), + Type::getVoidTy(*getContext()), + getExternalSymbol(TLI->getLibcallName(LibraryCall), + TLI->getPointerTy(getDataLayout())), + std::move(Args)) + .setDiscardResult() + .setTailCall(isTailCall); + + std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI); + return CallResult.second; +} + SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDVTList VTList, ArrayRef<SDValue> Ops, MachineMemOperand *MMO) { @@ -5736,6 +6084,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, assert((Opcode == ISD::ATOMIC_LOAD_ADD || Opcode == ISD::ATOMIC_LOAD_SUB || Opcode == ISD::ATOMIC_LOAD_AND || + Opcode == ISD::ATOMIC_LOAD_CLR || Opcode == ISD::ATOMIC_LOAD_OR || Opcode == ISD::ATOMIC_LOAD_XOR || Opcode == ISD::ATOMIC_LOAD_NAND || @@ -6207,7 +6556,7 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef<SDValue> Ops, MachineMemOperand *MMO) { - assert(Ops.size() == 5 && "Incompatible number of operands"); + assert(Ops.size() == 6 && "Incompatible number of operands"); FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops); @@ -6233,6 +6582,9 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, assert(N->getIndex().getValueType().getVectorNumElements() == N->getValueType(0).getVectorNumElements() && "Vector width mismatch between index and data"); + assert(isa<ConstantSDNode>(N->getScale()) && + cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() && + "Scale should be a constant power of 2"); CSEMap.InsertNode(N, IP); InsertNode(N); @@ -6244,7 +6596,7 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef<SDValue> Ops, MachineMemOperand *MMO) { - assert(Ops.size() == 5 && "Incompatible number of operands"); + assert(Ops.size() == 6 && "Incompatible number of operands"); FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops); @@ -6267,6 +6619,9 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, assert(N->getIndex().getValueType().getVectorNumElements() == N->getValue().getValueType().getVectorNumElements() && "Vector width mismatch between index and data"); + assert(isa<ConstantSDNode>(N->getScale()) && + cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() && + "Scale should be a constant power of 2"); CSEMap.InsertNode(N, IP); InsertNode(N); @@ -6558,6 +6913,7 @@ SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op) { // Now we update the operands. N->OperandList[0].set(Op); + updateDivergence(N); // If this gets put into a CSE map, add it. if (InsertPos) CSEMap.InsertNode(N, InsertPos); return N; @@ -6586,6 +6942,7 @@ SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) { if (N->OperandList[1] != Op2) N->OperandList[1].set(Op2); + updateDivergence(N); // If this gets put into a CSE map, add it. if (InsertPos) CSEMap.InsertNode(N, InsertPos); return N; @@ -6636,6 +6993,7 @@ UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops) { if (N->OperandList[i] != Ops[i]) N->OperandList[i].set(Ops[i]); + updateDivergence(N); // If this gets put into a CSE map, add it. if (InsertPos) CSEMap.InsertNode(N, InsertPos); return N; @@ -7061,11 +7419,24 @@ SDDbgValue *SelectionDAG::getConstantDbgValue(DIVariable *Var, /// FrameIndex SDDbgValue *SelectionDAG::getFrameIndexDbgValue(DIVariable *Var, DIExpression *Expr, unsigned FI, + bool IsIndirect, const DebugLoc &DL, unsigned O) { assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); - return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, FI, DL, O); + return new (DbgInfo->getAlloc()) + SDDbgValue(Var, Expr, FI, IsIndirect, DL, O, SDDbgValue::FRAMEIX); +} + +/// VReg +SDDbgValue *SelectionDAG::getVRegDbgValue(DIVariable *Var, + DIExpression *Expr, + unsigned VReg, bool IsIndirect, + const DebugLoc &DL, unsigned O) { + assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); + return new (DbgInfo->getAlloc()) + SDDbgValue(Var, Expr, VReg, IsIndirect, DL, O, SDDbgValue::VREG); } void SelectionDAG::transferDbgValues(SDValue From, SDValue To, @@ -7155,8 +7526,9 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) { DV->isIndirect(), DV->getDebugLoc(), DV->getOrder()); ClonedDVs.push_back(Clone); DV->setIsInvalidated(); - DEBUG(dbgs() << "SALVAGE: Rewriting"; N0.getNode()->dumprFull(this); - dbgs() << " into " << *DIExpr << '\n'); + LLVM_DEBUG(dbgs() << "SALVAGE: Rewriting"; + N0.getNode()->dumprFull(this); + dbgs() << " into " << *DIExpr << '\n'); } } } @@ -7165,6 +7537,14 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) { AddDbgValue(Dbg, Dbg->getSDNode(), false); } +/// Creates a SDDbgLabel node. +SDDbgLabel *SelectionDAG::getDbgLabel(DILabel *Label, + const DebugLoc &DL, unsigned O) { + assert(cast<DILabel>(Label)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); + return new (DbgInfo->getAlloc()) SDDbgLabel(Label, DL, O); +} + namespace { /// RAUWUpdateListener - Helper for ReplaceAllUsesWith - When the node @@ -7227,8 +7607,9 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) { SDUse &Use = UI.getUse(); ++UI; Use.set(To); + if (To->isDivergent() != From->isDivergent()) + updateDivergence(User); } while (UI != UE && *UI == User); - // Now that we have modified User, add it back to the CSE maps. If it // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User); @@ -7282,6 +7663,8 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) { SDUse &Use = UI.getUse(); ++UI; Use.setNode(To); + if (To->isDivergent() != From->isDivergent()) + updateDivergence(User); } while (UI != UE && *UI == User); // Now that we have modified User, add it back to the CSE maps. If it @@ -7326,8 +7709,9 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) { const SDValue &ToOp = To[Use.getResNo()]; ++UI; Use.set(ToOp); + if (To->getNode()->isDivergent() != From->isDivergent()) + updateDivergence(User); } while (UI != UE && *UI == User); - // Now that we have modified User, add it back to the CSE maps. If it // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User); @@ -7385,8 +7769,9 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){ ++UI; Use.set(To); + if (To->isDivergent() != From->isDivergent()) + updateDivergence(User); } while (UI != UE && *UI == User); - // We are iterating over all uses of the From node, so if a use // doesn't use the specific value, no changes are made. if (!UserRemovedFromCSEMaps) @@ -7419,6 +7804,72 @@ namespace { } // end anonymous namespace +void SelectionDAG::updateDivergence(SDNode * N) +{ + if (TLI->isSDNodeAlwaysUniform(N)) + return; + bool IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, DA); + for (auto &Op : N->ops()) { + if (Op.Val.getValueType() != MVT::Other) + IsDivergent |= Op.getNode()->isDivergent(); + } + if (N->SDNodeBits.IsDivergent != IsDivergent) { + N->SDNodeBits.IsDivergent = IsDivergent; + for (auto U : N->uses()) { + updateDivergence(U); + } + } +} + + +void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode*>& Order) { + DenseMap<SDNode *, unsigned> Degree; + Order.reserve(AllNodes.size()); + for (auto & N : allnodes()) { + unsigned NOps = N.getNumOperands(); + Degree[&N] = NOps; + if (0 == NOps) + Order.push_back(&N); + } + for (std::vector<SDNode *>::iterator I = Order.begin(); + I!=Order.end();++I) { + SDNode * N = *I; + for (auto U : N->uses()) { + unsigned &UnsortedOps = Degree[U]; + if (0 == --UnsortedOps) + Order.push_back(U); + } + } +} + +void SelectionDAG::VerifyDAGDiverence() +{ + std::vector<SDNode*> TopoOrder; + CreateTopologicalOrder(TopoOrder); + const TargetLowering &TLI = getTargetLoweringInfo(); + DenseMap<const SDNode *, bool> DivergenceMap; + for (auto &N : allnodes()) { + DivergenceMap[&N] = false; + } + for (auto N : TopoOrder) { + bool IsDivergent = DivergenceMap[N]; + bool IsSDNodeDivergent = TLI.isSDNodeSourceOfDivergence(N, FLI, DA); + for (auto &Op : N->ops()) { + if (Op.Val.getValueType() != MVT::Other) + IsSDNodeDivergent |= DivergenceMap[Op.getNode()]; + } + if (!IsDivergent && IsSDNodeDivergent && !TLI.isSDNodeAlwaysUniform(N)) { + DivergenceMap[N] = true; + } + } + for (auto &N : allnodes()) { + (void)N; + assert(DivergenceMap[&N] == N.isDivergent() && + "Divergence bit inconsistency detected\n"); + } +} + + /// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving /// uses of other values produced by From.getNode() alone. The same value /// may appear in both the From and To list. The Deleted vector is @@ -7450,7 +7901,7 @@ void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From, } // Sort the uses, so that all the uses from a given User are together. - std::sort(Uses.begin(), Uses.end()); + llvm::sort(Uses.begin(), Uses.end()); for (unsigned UseIndex = 0, UseIndexEnd = Uses.size(); UseIndex != UseIndexEnd; ) { @@ -7579,6 +8030,10 @@ void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) { DbgInfo->add(DB, SD, isParameter); } +void SelectionDAG::AddDbgLabel(SDDbgLabel *DB) { + DbgInfo->add(DB); +} + SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, SDValue NewMemOp) { assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node"); @@ -7963,8 +8418,8 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { const GlobalValue *GV; int64_t GVOffset = 0; if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { - unsigned PtrWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType()); - KnownBits Known(PtrWidth); + unsigned IdxWidth = getDataLayout().getIndexTypeSizeInBits(GV->getType()); + KnownBits Known(IdxWidth); llvm::computeKnownBits(GV, Known, getDataLayout()); unsigned AlignBits = Known.countMinTrailingZeros(); unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; @@ -8198,7 +8653,7 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) { return true; } -// \brief Returns the SDNode if it is a constant integer BuildVector +// Returns the SDNode if it is a constant integer BuildVector // or constant integer. SDNode *SelectionDAG::isConstantIntBuildVectorOrConstantInt(SDValue N) { if (isa<ConstantSDNode>(N)) @@ -8224,6 +8679,26 @@ SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) { return nullptr; } +void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) { + assert(!Node->OperandList && "Node already has operands"); + SDUse *Ops = OperandRecycler.allocate( + ArrayRecycler<SDUse>::Capacity::get(Vals.size()), OperandAllocator); + + bool IsDivergent = false; + for (unsigned I = 0; I != Vals.size(); ++I) { + Ops[I].setUser(Node); + Ops[I].setInitial(Vals[I]); + if (Ops[I].Val.getValueType() != MVT::Other) // Skip Chain. It does not carry divergence. + IsDivergent = IsDivergent || Ops[I].getNode()->isDivergent(); + } + Node->NumOperands = Vals.size(); + Node->OperandList = Ops; + IsDivergent |= TLI->isSDNodeSourceOfDivergence(Node, FLI, DA); + if (!TLI->isSDNodeAlwaysUniform(Node)) + Node->SDNodeBits.IsDivergent = IsDivergent; + checkForCycles(Node); +} + #ifndef NDEBUG static void checkForCyclesHelper(const SDNode *N, SmallPtrSetImpl<const SDNode*> &Visited, diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index da1574f60524..c859f16e74fe 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -99,16 +99,43 @@ BaseIndexOffset BaseIndexOffset::match(LSBaseSDNode *N, } // Consume constant adds & ors with appropriate masking. - while (Base->getOpcode() == ISD::ADD || Base->getOpcode() == ISD::OR) { - if (auto *C = dyn_cast<ConstantSDNode>(Base->getOperand(1))) { + while (true) { + switch (Base->getOpcode()) { + case ISD::OR: // Only consider ORs which act as adds. - if (Base->getOpcode() == ISD::OR && - !DAG.MaskedValueIsZero(Base->getOperand(0), C->getAPIntValue())) - break; - Offset += C->getSExtValue(); - Base = Base->getOperand(0); - continue; + if (auto *C = dyn_cast<ConstantSDNode>(Base->getOperand(1))) + if (DAG.MaskedValueIsZero(Base->getOperand(0), C->getAPIntValue())) { + Offset += C->getSExtValue(); + Base = Base->getOperand(0); + continue; + } + break; + case ISD::ADD: + if (auto *C = dyn_cast<ConstantSDNode>(Base->getOperand(1))) { + Offset += C->getSExtValue(); + Base = Base->getOperand(0); + continue; + } + break; + case ISD::LOAD: + case ISD::STORE: { + auto *LSBase = cast<LSBaseSDNode>(Base.getNode()); + unsigned int IndexResNo = (Base->getOpcode() == ISD::LOAD) ? 1 : 0; + if (LSBase->isIndexed() && Base.getResNo() == IndexResNo) + if (auto *C = dyn_cast<ConstantSDNode>(LSBase->getOffset())) { + auto Off = C->getSExtValue(); + if (LSBase->getAddressingMode() == ISD::PRE_DEC || + LSBase->getAddressingMode() == ISD::POST_DEC) + Offset -= Off; + else + Offset += Off; + Base = LSBase->getBasePtr(); + continue; + } + break; + } } + // If we get here break out of the loop. break; } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 68bbd62e1321..1aa8df29af3b 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "SelectionDAGBuilder.h" +#include "SDNodeDbgValue.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" @@ -49,7 +50,6 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -102,6 +102,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetIntrinsicInfo.h" @@ -777,8 +778,8 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, EVT ValueVT = ValueVTs[Value]; unsigned NumRegs = RegCount[Value]; MVT RegisterVT = IsABIMangled - ? TLI.getRegisterTypeForCallingConv(RegVTs[Value]) - : RegVTs[Value]; + ? TLI.getRegisterTypeForCallingConv(*DAG.getContext(), RegVTs[Value]) + : RegVTs[Value]; Parts.resize(NumRegs); for (unsigned i = 0; i != NumRegs; ++i) { @@ -818,32 +819,15 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, // FIXME: We capture more information than the dag can represent. For // now, just use the tightest assertzext/assertsext possible. - bool isSExt = true; + bool isSExt; EVT FromVT(MVT::Other); - if (NumSignBits == RegSize) { - isSExt = true; // ASSERT SEXT 1 - FromVT = MVT::i1; - } else if (NumZeroBits >= RegSize - 1) { - isSExt = false; // ASSERT ZEXT 1 - FromVT = MVT::i1; - } else if (NumSignBits > RegSize - 8) { - isSExt = true; // ASSERT SEXT 8 - FromVT = MVT::i8; - } else if (NumZeroBits >= RegSize - 8) { - isSExt = false; // ASSERT ZEXT 8 - FromVT = MVT::i8; - } else if (NumSignBits > RegSize - 16) { - isSExt = true; // ASSERT SEXT 16 - FromVT = MVT::i16; - } else if (NumZeroBits >= RegSize - 16) { - isSExt = false; // ASSERT ZEXT 16 - FromVT = MVT::i16; - } else if (NumSignBits > RegSize - 32) { - isSExt = true; // ASSERT SEXT 32 - FromVT = MVT::i32; - } else if (NumZeroBits >= RegSize - 32) { - isSExt = false; // ASSERT ZEXT 32 - FromVT = MVT::i32; + if (NumZeroBits) { + FromVT = EVT::getIntegerVT(*DAG.getContext(), RegSize - NumZeroBits); + isSExt = false; + } else if (NumSignBits > 1) { + FromVT = + EVT::getIntegerVT(*DAG.getContext(), RegSize - NumSignBits + 1); + isSExt = true; } else { continue; } @@ -876,8 +860,8 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, unsigned NumParts = RegCount[Value]; MVT RegisterVT = IsABIMangled - ? TLI.getRegisterTypeForCallingConv(RegVTs[Value]) - : RegVTs[Value]; + ? TLI.getRegisterTypeForCallingConv(*DAG.getContext(), RegVTs[Value]) + : RegVTs[Value]; if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT)) ExtendKind = ISD::ZERO_EXTEND; @@ -970,6 +954,20 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, } } +SmallVector<std::pair<unsigned, unsigned>, 4> +RegsForValue::getRegsAndSizes() const { + SmallVector<std::pair<unsigned, unsigned>, 4> OutVec; + unsigned I = 0; + for (auto CountAndVT : zip_first(RegCount, RegVTs)) { + unsigned RegCount = std::get<0>(CountAndVT); + MVT RegisterVT = std::get<1>(CountAndVT); + unsigned RegisterSize = RegisterVT.getSizeInBits(); + for (unsigned E = I + RegCount; I != E; ++I) + OutVec.push_back(std::make_pair(Regs[I], RegisterSize)); + } + return OutVec; +} + void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa, const TargetLibraryInfo *li) { AA = aa; @@ -1054,6 +1052,22 @@ void SelectionDAGBuilder::visit(const Instruction &I) { visit(I.getOpcode(), I); + if (auto *FPMO = dyn_cast<FPMathOperator>(&I)) { + // Propagate the fast-math-flags of this IR instruction to the DAG node that + // maps to this instruction. + // TODO: We could handle all flags (nsw, etc) here. + // TODO: If an IR instruction maps to >1 node, only the final node will have + // flags set. + if (SDNode *Node = getNodeForIRValue(&I)) { + SDNodeFlags IncomingFlags; + IncomingFlags.copyFMF(*FPMO); + if (!Node->getFlags().isDefined()) + Node->setFlags(IncomingFlags); + else + Node->intersectFlagsWith(IncomingFlags); + } + } + if (!isa<TerminatorInst>(&I) && !HasTailCall && !isStatepoint(&I)) // statepoints handle their exports internally CopyToExportRegsIfNeeded(&I); @@ -1077,14 +1091,39 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) { } } +void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable, + const DIExpression *Expr) { + auto isMatchingDbgValue = [&](DanglingDebugInfo &DDI) { + const DbgValueInst *DI = DDI.getDI(); + DIVariable *DanglingVariable = DI->getVariable(); + DIExpression *DanglingExpr = DI->getExpression(); + if (DanglingVariable == Variable && Expr->fragmentsOverlap(DanglingExpr)) { + LLVM_DEBUG(dbgs() << "Dropping dangling debug info for " << *DI << "\n"); + return true; + } + return false; + }; + + for (auto &DDIMI : DanglingDebugInfoMap) { + DanglingDebugInfoVector &DDIV = DDIMI.second; + DDIV.erase(remove_if(DDIV, isMatchingDbgValue), DDIV.end()); + } +} + // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V, // generate the debug data structures now that we've seen its definition. void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, SDValue Val) { - DanglingDebugInfo &DDI = DanglingDebugInfoMap[V]; - if (DDI.getDI()) { + auto DanglingDbgInfoIt = DanglingDebugInfoMap.find(V); + if (DanglingDbgInfoIt == DanglingDebugInfoMap.end()) + return; + + DanglingDebugInfoVector &DDIV = DanglingDbgInfoIt->second; + for (auto &DDI : DDIV) { const DbgValueInst *DI = DDI.getDI(); + assert(DI && "Ill-formed DanglingDebugInfo"); DebugLoc dl = DDI.getdl(); + unsigned ValSDNodeOrder = Val.getNode()->getIROrder(); unsigned DbgSDNodeOrder = DDI.getSDNodeOrder(); DILocalVariable *Variable = DI->getVariable(); DIExpression *Expr = DI->getExpression(); @@ -1093,13 +1132,26 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, SDDbgValue *SDV; if (Val.getNode()) { if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, false, Val)) { - SDV = getDbgValue(Val, Variable, Expr, dl, DbgSDNodeOrder); + LLVM_DEBUG(dbgs() << "Resolve dangling debug info [order=" + << DbgSDNodeOrder << "] for:\n " << *DI << "\n"); + LLVM_DEBUG(dbgs() << " By mapping to:\n "; Val.dump()); + // Increase the SDNodeOrder for the DbgValue here to make sure it is + // inserted after the definition of Val when emitting the instructions + // after ISel. An alternative could be to teach + // ScheduleDAGSDNodes::EmitSchedule to delay the insertion properly. + LLVM_DEBUG(if (ValSDNodeOrder > DbgSDNodeOrder) dbgs() + << "changing SDNodeOrder from " << DbgSDNodeOrder << " to " + << ValSDNodeOrder << "\n"); + SDV = getDbgValue(Val, Variable, Expr, dl, + std::max(DbgSDNodeOrder, ValSDNodeOrder)); DAG.AddDbgValue(SDV, Val.getNode(), false); - } + } else + LLVM_DEBUG(dbgs() << "Resolved dangling debug info for " << *DI + << "in EmitFuncArgumentDbgValue\n"); } else - DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); - DanglingDebugInfoMap[V] = DanglingDebugInfo(); + LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); } + DDIV.clear(); } /// getCopyFromRegs - If there was virtual register allocated for the value V @@ -1315,12 +1367,18 @@ void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) { auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX; bool IsCoreCLR = Pers == EHPersonality::CoreCLR; + bool IsSEH = isAsynchronousEHPersonality(Pers); + bool IsWasmCXX = Pers == EHPersonality::Wasm_CXX; MachineBasicBlock *CatchPadMBB = FuncInfo.MBB; + if (!IsSEH) + CatchPadMBB->setIsEHScopeEntry(); // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues. if (IsMSVCCXX || IsCoreCLR) CatchPadMBB->setIsEHFuncletEntry(); - - DAG.setRoot(DAG.getNode(ISD::CATCHPAD, getCurSDLoc(), MVT::Other, getControlRoot())); + // Wasm does not need catchpads anymore + if (!IsWasmCXX) + DAG.setRoot(DAG.getNode(ISD::CATCHPAD, getCurSDLoc(), MVT::Other, + getControlRoot())); } void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) { @@ -1363,7 +1421,8 @@ void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) { void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) { // Don't emit any special code for the cleanuppad instruction. It just marks - // the start of a funclet. + // the start of an EH scope/funclet. + FuncInfo.MBB->setIsEHScopeEntry(); FuncInfo.MBB->setIsEHFuncletEntry(); FuncInfo.MBB->setIsCleanupFuncletEntry(); } @@ -1385,6 +1444,7 @@ static void findUnwindDestinations( classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX; bool IsCoreCLR = Personality == EHPersonality::CoreCLR; + bool IsSEH = isAsynchronousEHPersonality(Personality); while (EHPadBB) { const Instruction *Pad = EHPadBB->getFirstNonPHI(); @@ -1397,6 +1457,7 @@ static void findUnwindDestinations( // Stop on cleanup pads. Cleanups are always funclet entries for all known // personalities. UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob); + UnwindDests.back().first->setIsEHScopeEntry(); UnwindDests.back().first->setIsEHFuncletEntry(); break; } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) { @@ -1406,6 +1467,8 @@ static void findUnwindDestinations( // For MSVC++ and the CLR, catchblocks are funclets and need prologues. if (IsMSVCCXX || IsCoreCLR) UnwindDests.back().first->setIsEHFuncletEntry(); + if (!IsSEH) + UnwindDests.back().first->setIsEHScopeEntry(); } NewEHPadBB = CatchSwitch->getUnwindDest(); } else { @@ -1653,8 +1716,7 @@ SelectionDAGBuilder::getEdgeProbability(const MachineBasicBlock *Src, if (!BPI) { // If BPI is not available, set the default probability as 1 / N, where N is // the number of successors. - auto SuccSize = std::max<uint32_t>( - std::distance(succ_begin(SrcBB), succ_end(SrcBB)), 1); + auto SuccSize = std::max<uint32_t>(succ_size(SrcBB), 1); return BranchProbability(1, SuccSize); } return BPI->getEdgeProbability(SrcBB, DstBB); @@ -2489,8 +2551,8 @@ void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) { assert(CC.Low == CC.High && "Input clusters must be single-case"); #endif - std::sort(Clusters.begin(), Clusters.end(), - [](const CaseCluster &a, const CaseCluster &b) { + llvm::sort(Clusters.begin(), Clusters.end(), + [](const CaseCluster &a, const CaseCluster &b) { return a.Low->getValue().slt(b.Low->getValue()); }); @@ -2551,9 +2613,23 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { } void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) { - if (DAG.getTarget().Options.TrapUnreachable) - DAG.setRoot( - DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot())); + if (!DAG.getTarget().Options.TrapUnreachable) + return; + + // We may be able to ignore unreachable behind a noreturn call. + if (DAG.getTarget().Options.NoTrapAfterNoreturn) { + const BasicBlock &BB = *I.getParent(); + if (&I != &BB.front()) { + BasicBlock::const_iterator PredI = + std::prev(BasicBlock::const_iterator(&I)); + if (const CallInst *Call = dyn_cast<CallInst>(&*PredI)) { + if (Call->doesNotReturn()) + return; + } + } + } + + DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot())); } void SelectionDAGBuilder::visitFSub(const User &I) { @@ -2597,6 +2673,10 @@ static bool isVectorReductionOp(const User *I) { } unsigned ElemNum = Inst->getType()->getVectorNumElements(); + // Ensure the reduction size is a power of 2. + if (!isPowerOf2_32(ElemNum)) + return false; + unsigned ElemNumToReduce = ElemNum; // Do DFS search on the def-use chain from the given instruction. We only @@ -2682,7 +2762,7 @@ static bool isVectorReductionOp(const User *I) { return false; const ConstantInt *Val = dyn_cast<ConstantInt>(U->getOperand(1)); - if (!Val || Val->getZExtValue() != 0) + if (!Val || !Val->isZero()) return false; ReduxExtracted = true; @@ -2693,45 +2773,23 @@ static bool isVectorReductionOp(const User *I) { return ReduxExtracted; } -void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { - SDValue Op1 = getValue(I.getOperand(0)); - SDValue Op2 = getValue(I.getOperand(1)); - - bool nuw = false; - bool nsw = false; - bool exact = false; - bool vec_redux = false; - FastMathFlags FMF; - - if (const OverflowingBinaryOperator *OFBinOp = - dyn_cast<const OverflowingBinaryOperator>(&I)) { - nuw = OFBinOp->hasNoUnsignedWrap(); - nsw = OFBinOp->hasNoSignedWrap(); +void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) { + SDNodeFlags Flags; + if (auto *OFBinOp = dyn_cast<OverflowingBinaryOperator>(&I)) { + Flags.setNoSignedWrap(OFBinOp->hasNoSignedWrap()); + Flags.setNoUnsignedWrap(OFBinOp->hasNoUnsignedWrap()); + } + if (auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I)) { + Flags.setExact(ExactOp->isExact()); } - if (const PossiblyExactOperator *ExactOp = - dyn_cast<const PossiblyExactOperator>(&I)) - exact = ExactOp->isExact(); - if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(&I)) - FMF = FPOp->getFastMathFlags(); - if (isVectorReductionOp(&I)) { - vec_redux = true; - DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n"); + Flags.setVectorReduction(true); + LLVM_DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n"); } - SDNodeFlags Flags; - Flags.setExact(exact); - Flags.setNoSignedWrap(nsw); - Flags.setNoUnsignedWrap(nuw); - Flags.setVectorReduction(vec_redux); - Flags.setAllowReciprocal(FMF.allowReciprocal()); - Flags.setAllowContract(FMF.allowContract()); - Flags.setNoInfs(FMF.noInfs()); - Flags.setNoNaNs(FMF.noNaNs()); - Flags.setNoSignedZeros(FMF.noSignedZeros()); - Flags.setUnsafeAlgebra(FMF.isFast()); - - SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(), + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + SDValue BinNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2, Flags); setValue(&I, BinNodeValue); } @@ -2823,13 +2881,12 @@ void SelectionDAGBuilder::visitFCmp(const User &I) { predicate = FCmpInst::Predicate(FC->getPredicate()); SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); - ISD::CondCode Condition = getFCmpCondCode(predicate); - // FIXME: Fcmp instructions have fast-math-flags in IR, so we should use them. - // FIXME: We should propagate the fast-math-flags to the DAG node itself for - // further optimization, but currently FMF is only applicable to binary nodes. - if (TM.Options.NoNaNsFPMath) + ISD::CondCode Condition = getFCmpCondCode(predicate); + auto *FPMO = dyn_cast<FPMathOperator>(&I); + if ((FPMO && FPMO->hasNoNaNs()) || TM.Options.NoNaNsFPMath) Condition = getFCmpCodeWithoutNaN(Condition); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition)); @@ -3424,10 +3481,9 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { DAG.getConstant(Offset, dl, N.getValueType()), Flags); } } else { - MVT PtrTy = - DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout(), AS); - unsigned PtrSize = PtrTy.getSizeInBits(); - APInt ElementSize(PtrSize, DL->getTypeAllocSize(GTI.getIndexedType())); + unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS); + MVT IdxTy = MVT::getIntegerVT(IdxSize); + APInt ElementSize(IdxSize, DL->getTypeAllocSize(GTI.getIndexedType())); // If this is a scalar constant or a splat vector of constants, // handle it quickly. @@ -3439,11 +3495,11 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (CI) { if (CI->isZero()) continue; - APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize); + APInt Offs = ElementSize * CI->getValue().sextOrTrunc(IdxSize); LLVMContext &Context = *DAG.getContext(); SDValue OffsVal = VectorWidth ? - DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, PtrTy, VectorWidth)) : - DAG.getConstant(Offs, dl, PtrTy); + DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorWidth)) : + DAG.getConstant(Offs, dl, IdxTy); // In an inbouds GEP with an offset that is nonnegative even when // interpreted as signed, assume there is no unsigned overflow. @@ -3867,7 +3923,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, // extract the splat value and use it as a uniform base. // In all other cases the function returns 'false'. static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index, - SelectionDAGBuilder* SDB) { + SDValue &Scale, SelectionDAGBuilder* SDB) { SelectionDAG& DAG = SDB->DAG; LLVMContext &Context = *DAG.getContext(); @@ -3897,6 +3953,10 @@ static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index, if (!SDB->findValue(Ptr) || !SDB->findValue(IndexVal)) return false; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const DataLayout &DL = DAG.getDataLayout(); + Scale = DAG.getTargetConstant(DL.getTypeAllocSize(GEP->getResultElementType()), + SDB->getCurSDLoc(), TLI.getPointerTy(DL)); Base = SDB->getValue(Ptr); Index = SDB->getValue(IndexVal); @@ -3926,8 +3986,9 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { SDValue Base; SDValue Index; + SDValue Scale; const Value *BasePtr = Ptr; - bool UniformBase = getUniformBase(BasePtr, Base, Index, this); + bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this); const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr; MachineMemOperand *MMO = DAG.getMachineFunction(). @@ -3935,10 +3996,11 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { MachineMemOperand::MOStore, VT.getStoreSize(), Alignment, AAInfo); if (!UniformBase) { - Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); + Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); + Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); } - SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index }; + SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index, Scale }; SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl, Ops, MMO); DAG.setRoot(Scatter); @@ -3997,10 +4059,8 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO, ISD::NON_EXTLOAD, IsExpanding); - if (AddToChain) { - SDValue OutChain = Load.getValue(1); - DAG.setRoot(OutChain); - } + if (AddToChain) + PendingLoads.push_back(Load.getValue(1)); setValue(&I, Load); } @@ -4025,8 +4085,9 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { SDValue Root = DAG.getRoot(); SDValue Base; SDValue Index; + SDValue Scale; const Value *BasePtr = Ptr; - bool UniformBase = getUniformBase(BasePtr, Base, Index, this); + bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this); bool ConstantMemory = false; if (UniformBase && AA && AA->pointsToConstantMemory(MemoryLocation( @@ -4044,10 +4105,11 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { Alignment, AAInfo, Ranges); if (!UniformBase) { - Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); + Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); + Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); } - SDValue Ops[] = { Root, Src0, Mask, Base, Index }; + SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale }; SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl, Ops, MMO); @@ -4868,26 +4930,18 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( const auto &TLI = DAG.getTargetLoweringInfo(); RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second, V->getType(), isABIRegCopy(V)); - unsigned NumRegs = - std::accumulate(RFV.RegCount.begin(), RFV.RegCount.end(), 0); - if (NumRegs > 1) { - unsigned I = 0; + if (RFV.occupiesMultipleRegs()) { unsigned Offset = 0; - auto RegisterVT = RFV.RegVTs.begin(); - for (auto RegCount : RFV.RegCount) { - unsigned RegisterSize = (RegisterVT++)->getSizeInBits(); - for (unsigned E = I + RegCount; I != E; ++I) { - // The vregs are guaranteed to be allocated in sequence. - Op = MachineOperand::CreateReg(VMI->second + I, false); - auto FragmentExpr = DIExpression::createFragmentExpression( - Expr, Offset, RegisterSize); - if (!FragmentExpr) - continue; - FuncInfo.ArgDbgValues.push_back( - BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare, - Op->getReg(), Variable, *FragmentExpr)); - Offset += RegisterSize; - } + for (auto RegAndSize : RFV.getRegsAndSizes()) { + Op = MachineOperand::CreateReg(RegAndSize.first, false); + auto FragmentExpr = DIExpression::createFragmentExpression( + Expr, Offset, RegAndSize.second); + if (!FragmentExpr) + continue; + FuncInfo.ArgDbgValues.push_back( + BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare, + Op->getReg(), Variable, *FragmentExpr)); + Offset += RegAndSize.second; } return true; } @@ -4901,17 +4955,10 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( assert(Variable->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); - if (Op->isReg()) - FuncInfo.ArgDbgValues.push_back( - BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect, - Op->getReg(), Variable, Expr)); - else - FuncInfo.ArgDbgValues.push_back( - BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE)) - .add(*Op) - .addImm(0) - .addMetadata(Variable) - .addMetadata(Expr)); + IsIndirect = (Op->isReg()) ? IsIndirect : true; + FuncInfo.ArgDbgValues.push_back( + BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect, + *Op, Variable, Expr)); return true; } @@ -4924,13 +4971,20 @@ SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N, unsigned DbgSDNodeOrder) { if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) { // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe - // stack slot locations as such instead of as indirectly addressed - // locations. - return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(), dl, - DbgSDNodeOrder); + // stack slot locations. + // + // Consider "int x = 0; int *px = &x;". There are two kinds of interesting + // debug values here after optimization: + // + // dbg.value(i32* %px, !"int *px", !DIExpression()), and + // dbg.value(i32* %px, !"int x", !DIExpression(DW_OP_deref)) + // + // Both describe the direct values of their associated variables. + return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(), + /*IsIndirect*/ false, dl, DbgSDNodeOrder); } - return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false, dl, - DbgSDNodeOrder); + return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), + /*IsIndirect*/ false, dl, DbgSDNodeOrder); } // VisualStudio defines setjmp as _setjmp @@ -5000,14 +5054,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::longjmp: return &"_longjmp"[!TLI.usesUnderscoreLongJmp()]; case Intrinsic::memcpy: { + const auto &MCI = cast<MemCpyInst>(I); SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); - unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); - if (!Align) - Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment. - bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); + // @llvm.memcpy defines 0 and 1 to both mean no alignment. + unsigned DstAlign = std::max<unsigned>(MCI.getDestAlignment(), 1); + unsigned SrcAlign = std::max<unsigned>(MCI.getSourceAlignment(), 1); + unsigned Align = MinAlign(DstAlign, SrcAlign); + bool isVol = MCI.isVolatile(); bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + // FIXME: Support passing different dest/src alignments to the memcpy DAG + // node. SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, false, isTC, MachinePointerInfo(I.getArgOperand(0)), @@ -5016,13 +5074,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } case Intrinsic::memset: { + const auto &MSI = cast<MemSetInst>(I); SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); - unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); - if (!Align) - Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment. - bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); + // @llvm.memset defines 0 and 1 to both mean no alignment. + unsigned Align = std::max<unsigned>(MSI.getDestAlignment(), 1); + bool isVol = MSI.isVolatile(); bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, isTC, MachinePointerInfo(I.getArgOperand(0))); @@ -5030,14 +5088,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } case Intrinsic::memmove: { + const auto &MMI = cast<MemMoveInst>(I); SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); - unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); - if (!Align) - Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment. - bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); + // @llvm.memmove defines 0 and 1 to both mean no alignment. + unsigned DstAlign = std::max<unsigned>(MMI.getDestAlignment(), 1); + unsigned SrcAlign = std::max<unsigned>(MMI.getSourceAlignment(), 1); + unsigned Align = MinAlign(DstAlign, SrcAlign); + bool isVol = MMI.isVolatile(); bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + // FIXME: Support passing different dest/src alignments to the memmove DAG + // node. SDValue MM = DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, isTC, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1))); @@ -5050,36 +5112,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Src = getValue(MI.getRawSource()); SDValue Length = getValue(MI.getLength()); - // Emit a library call. - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); - Entry.Node = Dst; - Args.push_back(Entry); - - Entry.Node = Src; - Args.push_back(Entry); - - Entry.Ty = MI.getLength()->getType(); - Entry.Node = Length; - Args.push_back(Entry); - - uint64_t ElementSizeConstant = MI.getElementSizeInBytes(); - RTLIB::Libcall LibraryCall = - RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant); - if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) - report_fatal_error("Unsupported element size"); - - TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee( - TLI.getLibcallCallingConv(LibraryCall), - Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall), - TLI.getPointerTy(DAG.getDataLayout())), - std::move(Args)); - - std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); - DAG.setRoot(CallResult.second); + unsigned DstAlign = MI.getDestAlignment(); + unsigned SrcAlign = MI.getSourceAlignment(); + Type *LengthTy = MI.getLength()->getType(); + unsigned ElemSz = MI.getElementSizeInBytes(); + bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + SDValue MC = DAG.getAtomicMemcpy(getRoot(), sdl, Dst, DstAlign, Src, + SrcAlign, Length, LengthTy, ElemSz, isTC, + MachinePointerInfo(MI.getRawDest()), + MachinePointerInfo(MI.getRawSource())); + updateDAGForMaybeTailCall(MC); return nullptr; } case Intrinsic::memmove_element_unordered_atomic: { @@ -5088,36 +5130,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Src = getValue(MI.getRawSource()); SDValue Length = getValue(MI.getLength()); - // Emit a library call. - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); - Entry.Node = Dst; - Args.push_back(Entry); - - Entry.Node = Src; - Args.push_back(Entry); - - Entry.Ty = MI.getLength()->getType(); - Entry.Node = Length; - Args.push_back(Entry); - - uint64_t ElementSizeConstant = MI.getElementSizeInBytes(); - RTLIB::Libcall LibraryCall = - RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant); - if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) - report_fatal_error("Unsupported element size"); - - TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee( - TLI.getLibcallCallingConv(LibraryCall), - Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall), - TLI.getPointerTy(DAG.getDataLayout())), - std::move(Args)); - - std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); - DAG.setRoot(CallResult.second); + unsigned DstAlign = MI.getDestAlignment(); + unsigned SrcAlign = MI.getSourceAlignment(); + Type *LengthTy = MI.getLength()->getType(); + unsigned ElemSz = MI.getElementSizeInBytes(); + bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + SDValue MC = DAG.getAtomicMemmove(getRoot(), sdl, Dst, DstAlign, Src, + SrcAlign, Length, LengthTy, ElemSz, isTC, + MachinePointerInfo(MI.getRawDest()), + MachinePointerInfo(MI.getRawSource())); + updateDAGForMaybeTailCall(MC); return nullptr; } case Intrinsic::memset_element_unordered_atomic: { @@ -5126,37 +5148,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Val = getValue(MI.getValue()); SDValue Length = getValue(MI.getLength()); - // Emit a library call. - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); - Entry.Node = Dst; - Args.push_back(Entry); - - Entry.Ty = Type::getInt8Ty(*DAG.getContext()); - Entry.Node = Val; - Args.push_back(Entry); - - Entry.Ty = MI.getLength()->getType(); - Entry.Node = Length; - Args.push_back(Entry); - - uint64_t ElementSizeConstant = MI.getElementSizeInBytes(); - RTLIB::Libcall LibraryCall = - RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant); - if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) - report_fatal_error("Unsupported element size"); - - TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee( - TLI.getLibcallCallingConv(LibraryCall), - Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall), - TLI.getPointerTy(DAG.getDataLayout())), - std::move(Args)); - - std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); - DAG.setRoot(CallResult.second); + unsigned DstAlign = MI.getDestAlignment(); + Type *LengthTy = MI.getLength()->getType(); + unsigned ElemSz = MI.getElementSizeInBytes(); + bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + SDValue MC = DAG.getAtomicMemset(getRoot(), sdl, Dst, DstAlign, Val, Length, + LengthTy, ElemSz, isTC, + MachinePointerInfo(MI.getRawDest())); + updateDAGForMaybeTailCall(MC); return nullptr; } case Intrinsic::dbg_addr: @@ -5164,13 +5163,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { const DbgInfoIntrinsic &DI = cast<DbgInfoIntrinsic>(I); DILocalVariable *Variable = DI.getVariable(); DIExpression *Expression = DI.getExpression(); + dropDanglingDebugInfo(Variable, Expression); assert(Variable && "Missing variable"); // Check if address has undef value. const Value *Address = DI.getVariableLocation(); if (!Address || isa<UndefValue>(Address) || (Address->use_empty() && !isa<Argument>(Address))) { - DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); + LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); return nullptr; } @@ -5195,10 +5195,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // DBG_VALUE instructions. llvm.dbg.declare is handled as a frame index in // the MachineFunction variable table. if (FI != std::numeric_limits<int>::max()) { - if (Intrinsic == Intrinsic::dbg_addr) - DAG.AddDbgValue(DAG.getFrameIndexDbgValue(Variable, Expression, FI, dl, - SDNodeOrder), - getRoot().getNode(), isParameter); + if (Intrinsic == Intrinsic::dbg_addr) { + SDDbgValue *SDV = DAG.getFrameIndexDbgValue( + Variable, Expression, FI, /*IsIndirect*/ true, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, getRoot().getNode(), isParameter); + } return nullptr; } @@ -5214,8 +5215,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { auto FINode = dyn_cast<FrameIndexSDNode>(N.getNode()); if (isParameter && FINode) { // Byval parameter. We have a frame index at this point. - SDV = DAG.getFrameIndexDbgValue(Variable, Expression, - FINode->getIndex(), dl, SDNodeOrder); + SDV = + DAG.getFrameIndexDbgValue(Variable, Expression, FINode->getIndex(), + /*IsIndirect*/ true, dl, SDNodeOrder); } else if (isa<Argument>(Address)) { // Address is an argument, so try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. @@ -5231,17 +5233,28 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // virtual register info from the FuncInfo.ValueMap. if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true, N)) { - DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); + LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); } } return nullptr; } + case Intrinsic::dbg_label: { + const DbgLabelInst &DI = cast<DbgLabelInst>(I); + DILabel *Label = DI.getLabel(); + assert(Label && "Missing label"); + + SDDbgLabel *SDV; + SDV = DAG.getDbgLabel(Label, dl, SDNodeOrder); + DAG.AddDbgLabel(SDV); + return nullptr; + } case Intrinsic::dbg_value: { const DbgValueInst &DI = cast<DbgValueInst>(I); assert(DI.getVariable() && "Missing variable"); DILocalVariable *Variable = DI.getVariable(); DIExpression *Expression = DI.getExpression(); + dropDanglingDebugInfo(Variable, Expression); const Value *V = DI.getValue(); if (!V) return nullptr; @@ -5266,16 +5279,64 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } + // PHI nodes have already been selected, so we should know which VReg that + // is assigns to already. + if (isa<PHINode>(V)) { + auto VMI = FuncInfo.ValueMap.find(V); + if (VMI != FuncInfo.ValueMap.end()) { + unsigned Reg = VMI->second; + // The PHI node may be split up into several MI PHI nodes (in + // FunctionLoweringInfo::set). + RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, + V->getType(), false); + if (RFV.occupiesMultipleRegs()) { + unsigned Offset = 0; + unsigned BitsToDescribe = 0; + if (auto VarSize = Variable->getSizeInBits()) + BitsToDescribe = *VarSize; + if (auto Fragment = Expression->getFragmentInfo()) + BitsToDescribe = Fragment->SizeInBits; + for (auto RegAndSize : RFV.getRegsAndSizes()) { + unsigned RegisterSize = RegAndSize.second; + // Bail out if all bits are described already. + if (Offset >= BitsToDescribe) + break; + unsigned FragmentSize = (Offset + RegisterSize > BitsToDescribe) + ? BitsToDescribe - Offset + : RegisterSize; + auto FragmentExpr = DIExpression::createFragmentExpression( + Expression, Offset, FragmentSize); + if (!FragmentExpr) + continue; + SDV = DAG.getVRegDbgValue(Variable, *FragmentExpr, RegAndSize.first, + false, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, nullptr, false); + Offset += RegisterSize; + } + } else { + SDV = DAG.getVRegDbgValue(Variable, Expression, Reg, false, dl, + SDNodeOrder); + DAG.AddDbgValue(SDV, nullptr, false); + } + return nullptr; + } + } + + // TODO: When we get here we will either drop the dbg.value completely, or + // we try to move it forward by letting it dangle for awhile. So we should + // probably add an extra DbgValue to the DAG here, with a reference to + // "noreg", to indicate that we have lost the debug location for the + // variable. + if (!V->use_empty() ) { // Do not call getValue(V) yet, as we don't want to generate code. // Remember it for later. - DanglingDebugInfo DDI(&DI, dl, SDNodeOrder); - DanglingDebugInfoMap[V] = DDI; + DanglingDebugInfoMap[V].emplace_back(&DI, dl, SDNodeOrder); return nullptr; } - DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n"); - DEBUG(dbgs() << " Last seen at:\n " << *V << "\n"); + LLVM_DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n"); + LLVM_DEBUG(dbgs() << " Last seen at:\n " << *V << "\n"); return nullptr; } @@ -5609,6 +5670,52 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg)); return nullptr; } + case Intrinsic::fshl: + case Intrinsic::fshr: { + bool IsFSHL = Intrinsic == Intrinsic::fshl; + SDValue X = getValue(I.getArgOperand(0)); + SDValue Y = getValue(I.getArgOperand(1)); + SDValue Z = getValue(I.getArgOperand(2)); + EVT VT = X.getValueType(); + + // When X == Y, this is rotate. Create the node directly if legal. + // TODO: This should also be done if the operation is custom, but we have + // to make sure targets are handling the modulo shift amount as expected. + // TODO: If the rotate direction (left or right) corresponding to the shift + // is not available, adjust the shift value and invert the direction. + auto RotateOpcode = IsFSHL ? ISD::ROTL : ISD::ROTR; + if (X == Y && TLI.isOperationLegal(RotateOpcode, VT)) { + setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z)); + return nullptr; + } + + // Get the shift amount and inverse shift amount, modulo the bit-width. + SDValue BitWidthC = DAG.getConstant(VT.getScalarSizeInBits(), sdl, VT); + SDValue ShAmt = DAG.getNode(ISD::UREM, sdl, VT, Z, BitWidthC); + SDValue NegZ = DAG.getNode(ISD::SUB, sdl, VT, BitWidthC, Z); + SDValue InvShAmt = DAG.getNode(ISD::UREM, sdl, VT, NegZ, BitWidthC); + + // fshl: (X << (Z % BW)) | (Y >> ((BW - Z) % BW)) + // fshr: (X << ((BW - Z) % BW)) | (Y >> (Z % BW)) + SDValue ShX = DAG.getNode(ISD::SHL, sdl, VT, X, IsFSHL ? ShAmt : InvShAmt); + SDValue ShY = DAG.getNode(ISD::SRL, sdl, VT, Y, IsFSHL ? InvShAmt : ShAmt); + SDValue Res = DAG.getNode(ISD::OR, sdl, VT, ShX, ShY); + + // If (Z % BW == 0), then (BW - Z) % BW is also zero, so the result would + // be X | Y. If X == Y (rotate), that's fine. If not, we have to select. + if (X != Y) { + SDValue Zero = DAG.getConstant(0, sdl, VT); + EVT CCVT = MVT::i1; + if (VT.isVector()) + CCVT = EVT::getVectorVT(*Context, CCVT, VT.getVectorNumElements()); + // For fshl, 0 shift returns the 1st arg (X). + // For fshr, 0 shift returns the 2nd arg (Y). + SDValue IsZeroShift = DAG.getSetCC(sdl, CCVT, ShAmt, Zero, ISD::SETEQ); + Res = DAG.getSelect(sdl, VT, IsZeroShift, IsFSHL ? X : Y, Res); + } + setValue(&I, Res); + return nullptr; + } case Intrinsic::stacksave: { SDValue Op = getRoot(); Res = DAG.getNode( @@ -5703,7 +5810,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::annotation: case Intrinsic::ptr_annotation: - case Intrinsic::invariant_group_barrier: + case Intrinsic::launder_invariant_group: + case Intrinsic::strip_invariant_group: // Drop the intrinsic, but forward the value setValue(&I, getValue(I.getOperand(0))); return nullptr; @@ -5822,17 +5930,23 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Ops[5]; unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); auto Flags = rw == 0 ? MachineMemOperand::MOLoad :MachineMemOperand::MOStore; - Ops[0] = getRoot(); + Ops[0] = DAG.getRoot(); Ops[1] = getValue(I.getArgOperand(0)); Ops[2] = getValue(I.getArgOperand(1)); Ops[3] = getValue(I.getArgOperand(2)); Ops[4] = getValue(I.getArgOperand(3)); - DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, sdl, - DAG.getVTList(MVT::Other), Ops, - EVT::getIntegerVT(*Context, 8), - MachinePointerInfo(I.getArgOperand(0)), - 0, /* align */ - Flags)); + SDValue Result = DAG.getMemIntrinsicNode(ISD::PREFETCH, sdl, + DAG.getVTList(MVT::Other), Ops, + EVT::getIntegerVT(*Context, 8), + MachinePointerInfo(I.getArgOperand(0)), + 0, /* align */ + Flags); + + // Chain the prefetch in parallell with any pending loads, to stay out of + // the way of later optimizations. + PendingLoads.push_back(Result); + Result = getRoot(); + DAG.setRoot(Result); return nullptr; } case Intrinsic::lifetime_start: @@ -6004,6 +6118,41 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, patchableNode); return nullptr; } + case Intrinsic::xray_typedevent: { + // Here we want to make sure that the intrinsic behaves as if it has a + // specific calling convention, and only for x86_64. + // FIXME: Support other platforms later. + const auto &Triple = DAG.getTarget().getTargetTriple(); + if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux()) + return nullptr; + + SDLoc DL = getCurSDLoc(); + SmallVector<SDValue, 8> Ops; + + // We want to say that we always want the arguments in registers. + // It's unclear to me how manipulating the selection DAG here forces callers + // to provide arguments in registers instead of on the stack. + SDValue LogTypeId = getValue(I.getArgOperand(0)); + SDValue LogEntryVal = getValue(I.getArgOperand(1)); + SDValue StrSizeVal = getValue(I.getArgOperand(2)); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue Chain = getRoot(); + Ops.push_back(LogTypeId); + Ops.push_back(LogEntryVal); + Ops.push_back(StrSizeVal); + Ops.push_back(Chain); + + // We need to enforce the calling convention for the callsite, so that + // argument ordering is enforced correctly, and that register allocation can + // see that some registers may be assumed clobbered and have to preserve + // them across calls to the intrinsic. + MachineSDNode *MN = DAG.getMachineNode( + TargetOpcode::PATCHABLE_TYPED_EVENT_CALL, DL, NodeTys, Ops); + SDValue patchableNode = SDValue(MN, 0); + DAG.setRoot(patchableNode); + setValue(&I, patchableNode); + return nullptr; + } case Intrinsic::experimental_deoptimize: LowerDeoptimizeCall(&I); return nullptr; @@ -6023,6 +6172,66 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::experimental_vector_reduce_fmin: visitVectorReduce(I, Intrinsic); return nullptr; + + case Intrinsic::icall_branch_funnel: { + SmallVector<SDValue, 16> Ops; + Ops.push_back(DAG.getRoot()); + Ops.push_back(getValue(I.getArgOperand(0))); + + int64_t Offset; + auto *Base = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset( + I.getArgOperand(1), Offset, DAG.getDataLayout())); + if (!Base) + report_fatal_error( + "llvm.icall.branch.funnel operand must be a GlobalValue"); + Ops.push_back(DAG.getTargetGlobalAddress(Base, getCurSDLoc(), MVT::i64, 0)); + + struct BranchFunnelTarget { + int64_t Offset; + SDValue Target; + }; + SmallVector<BranchFunnelTarget, 8> Targets; + + for (unsigned Op = 1, N = I.getNumArgOperands(); Op != N; Op += 2) { + auto *ElemBase = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset( + I.getArgOperand(Op), Offset, DAG.getDataLayout())); + if (ElemBase != Base) + report_fatal_error("all llvm.icall.branch.funnel operands must refer " + "to the same GlobalValue"); + + SDValue Val = getValue(I.getArgOperand(Op + 1)); + auto *GA = dyn_cast<GlobalAddressSDNode>(Val); + if (!GA) + report_fatal_error( + "llvm.icall.branch.funnel operand must be a GlobalValue"); + Targets.push_back({Offset, DAG.getTargetGlobalAddress( + GA->getGlobal(), getCurSDLoc(), + Val.getValueType(), GA->getOffset())}); + } + llvm::sort(Targets.begin(), Targets.end(), + [](const BranchFunnelTarget &T1, const BranchFunnelTarget &T2) { + return T1.Offset < T2.Offset; + }); + + for (auto &T : Targets) { + Ops.push_back(DAG.getTargetConstant(T.Offset, getCurSDLoc(), MVT::i32)); + Ops.push_back(T.Target); + } + + SDValue N(DAG.getMachineNode(TargetOpcode::ICALL_BRANCH_FUNNEL, + getCurSDLoc(), MVT::Other, Ops), + 0); + DAG.setRoot(N); + setValue(&I, N); + HasTailCall = true; + return nullptr; + } + + case Intrinsic::wasm_landingpad_index: { + // TODO store landing pad index in a map, which will be used when generating + // LSDA information + return nullptr; + } } } @@ -6172,7 +6381,10 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel)); // Inform MachineModuleInfo of range. - if (MF.hasEHFunclets()) { + auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); + // There is a platform (e.g. wasm) that uses funclet style IR but does not + // actually use outlined funclets and their LSDA info style. + if (MF.hasEHFunclets() && isFuncletEHPersonality(Pers)) { assert(CLI.CS); WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo(); EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS.getInstruction()), @@ -6630,14 +6842,13 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { const char *RenameFn = nullptr; if (Function *F = I.getCalledFunction()) { if (F->isDeclaration()) { - if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) { - if (unsigned IID = II->getIntrinsicID(F)) { - RenameFn = visitIntrinsicCall(I, IID); - if (!RenameFn) - return; - } - } - if (Intrinsic::ID IID = F->getIntrinsicID()) { + // Is this an LLVM intrinsic or a target-specific intrinsic? + unsigned IID = F->getIntrinsicID(); + if (!IID) + if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) + IID = II->getIntrinsicID(F); + + if (IID) { RenameFn = visitIntrinsicCall(I, IID); if (!RenameFn) return; @@ -6989,27 +7200,37 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, unsigned NumRegs = 1; if (OpInfo.ConstraintVT != MVT::Other) { - // If this is a FP input in an integer register (or visa versa) insert a bit - // cast of the input value. More generally, handle any case where the input - // value disagrees with the register class we plan to stick this in. - if (OpInfo.Type == InlineAsm::isInput && PhysReg.second && + // If this is a FP operand in an integer register (or visa versa), or more + // generally if the operand value disagrees with the register class we plan + // to stick it in, fix the operand type. + // + // If this is an input value, the bitcast to the new type is done now. + // Bitcast for output value is done at the end of visitInlineAsm(). + if ((OpInfo.Type == InlineAsm::isOutput || + OpInfo.Type == InlineAsm::isInput) && + PhysReg.second && !TRI.isTypeLegalForClass(*PhysReg.second, OpInfo.ConstraintVT)) { // Try to convert to the first EVT that the reg class contains. If the // types are identical size, use a bitcast to convert (e.g. two differing - // vector types). + // vector types). Note: output bitcast is done at the end of + // visitInlineAsm(). MVT RegVT = *TRI.legalclasstypes_begin(*PhysReg.second); - if (RegVT.getSizeInBits() == OpInfo.CallOperand.getValueSizeInBits()) { - OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, - RegVT, OpInfo.CallOperand); + if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) { + // Exclude indirect inputs while they are unsupported because the code + // to perform the load is missing and thus OpInfo.CallOperand still + // refer to the input address rather than the pointed-to value. + if (OpInfo.Type == InlineAsm::isInput && !OpInfo.isIndirect) + OpInfo.CallOperand = + DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand); OpInfo.ConstraintVT = RegVT; + // If the operand is a FP value and we want it in integer registers, + // use the corresponding integer type. This turns an f64 value into + // i64, which can be passed with two i32 values on a 32-bit machine. } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) { - // If the input is a FP value and we want it in FP registers, do a - // bitcast to the corresponding integer type. This turns an f64 value - // into i64, which can be passed with two i32 values on a 32-bit - // machine. RegVT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits()); - OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, - RegVT, OpInfo.CallOperand); + if (OpInfo.Type == InlineAsm::isInput) + OpInfo.CallOperand = + DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand); OpInfo.ConstraintVT = RegVT; } } @@ -7246,7 +7467,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { continue; // If this is a memory input, and if the operand is not indirect, do what we - // need to to provide an address for the memory input. + // need to provide an address for the memory input. if (OpInfo.ConstraintType == TargetLowering::C_Memory && !OpInfo.isIndirect) { assert((OpInfo.isMultipleAlternative || @@ -7521,12 +7742,18 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) { EVT ResultType = TLI.getValueType(DAG.getDataLayout(), CS.getType()); - // If any of the results of the inline asm is a vector, it may have the - // wrong width/num elts. This can happen for register classes that can - // contain multiple different value types. The preg or vreg allocated may - // not have the same VT as was expected. Convert it to the right type - // with bit_convert. - if (ResultType != Val.getValueType() && Val.getValueType().isVector()) { + // If the type of the inline asm call site return value is different but + // has same size as the type of the asm output bitcast it. One example + // of this is for vectors with different width / number of elements. + // This can happen for register classes that can contain multiple + // different value types. The preg or vreg allocated may not have the + // same VT as was expected. + // + // This can also happen for a return value that disagrees with the + // register class it is put in, eg. a double in a general-purpose + // register on a 32-bit machine. + if (ResultType != Val.getValueType() && + ResultType.getSizeInBits() == Val.getValueSizeInBits()) { Val = DAG.getNode(ISD::BITCAST, getCurSDLoc(), ResultType, Val); @@ -7581,8 +7808,17 @@ void SelectionDAGBuilder::emitInlineAsmError(ImmutableCallSite CS, // Make sure we leave the DAG in a valid state const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - auto VT = TLI.getValueType(DAG.getDataLayout(), CS.getType()); - setValue(CS.getInstruction(), DAG.getUNDEF(VT)); + SmallVector<EVT, 1> ValueVTs; + ComputeValueVTs(TLI, DAG.getDataLayout(), CS->getType(), ValueVTs); + + if (ValueVTs.empty()) + return; + + SmallVector<SDValue, 1> Ops; + for (unsigned i = 0, e = ValueVTs.size(); i != e; ++i) + Ops.push_back(DAG.getUNDEF(ValueVTs[i])); + + setValue(CS.getInstruction(), DAG.getMergeValues(Ops, getCurSDLoc())); } void SelectionDAGBuilder::visitVAStart(const CallInst &I) { @@ -7656,7 +7892,7 @@ SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG, return DAG.getMergeValues(Ops, SL); } -/// \brief Populate a CallLowerinInfo (into \p CLI) based on the properties of +/// Populate a CallLowerinInfo (into \p CLI) based on the properties of /// the call being lowered. /// /// This is a helper for lowering intrinsics that follow a target calling @@ -7680,7 +7916,7 @@ void SelectionDAGBuilder::populateCallLoweringInfo( TargetLowering::ArgListEntry Entry; Entry.Node = getValue(V); Entry.Ty = V->getType(); - Entry.setAttributes(&CS, ArgIdx); + Entry.setAttributes(&CS, ArgI); Args.push_back(Entry); } @@ -7691,7 +7927,7 @@ void SelectionDAGBuilder::populateCallLoweringInfo( .setIsPatchPoint(IsPatchPoint); } -/// \brief Add a stack map intrinsic call's live variable operands to a stackmap +/// Add a stack map intrinsic call's live variable operands to a stackmap /// or patchpoint target node's operand list. /// /// Constants are converted to TargetConstants purely as an optimization to @@ -7727,7 +7963,7 @@ static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx, } } -/// \brief Lower llvm.experimental.stackmap directly to its target opcode. +/// Lower llvm.experimental.stackmap directly to its target opcode. void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { // void @llvm.experimental.stackmap(i32 <id>, i32 <numShadowBytes>, // [live variables...]) @@ -7790,7 +8026,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { FuncInfo.MF->getFrameInfo().setHasStackMap(); } -/// \brief Lower llvm.experimental.patchpoint directly to its target opcode. +/// Lower llvm.experimental.patchpoint directly to its target opcode. void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, const BasicBlock *EHPadBB) { // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>, @@ -7954,8 +8190,6 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, FastMathFlags FMF; if (isa<FPMathOperator>(I)) FMF = I.getFastMathFlags(); - SDNodeFlags SDFlags; - SDFlags.setNoNaNs(FMF.noNaNs()); switch (Intrinsic) { case Intrinsic::experimental_vector_reduce_fadd: @@ -7998,10 +8232,10 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, Res = DAG.getNode(ISD::VECREDUCE_UMIN, dl, VT, Op1); break; case Intrinsic::experimental_vector_reduce_fmax: - Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1, SDFlags); + Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1); break; case Intrinsic::experimental_vector_reduce_fmin: - Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1, SDFlags); + Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1); break; default: llvm_unreachable("Unhandled vector reduce intrinsic"); @@ -8220,8 +8454,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { else if (Args[i].IsZExt) ExtendKind = ISD::ZERO_EXTEND; - // Conservatively only handle 'returned' on non-vectors for now - if (Args[i].IsReturned && !Op.getValueType().isVector()) { + // Conservatively only handle 'returned' on non-vectors that can be lowered, + // for now. + if (Args[i].IsReturned && !Op.getValueType().isVector() && + CanLowerReturn) { assert(CLI.RetTy == Args[i].Ty && RetTys.size() == NumValues && "unexpected use of 'returned'"); // Before passing 'returned' to the target lowering code, ensure that @@ -8500,7 +8736,8 @@ findArgumentCopyElisionCandidates(const DataLayout &DL, continue; } - DEBUG(dbgs() << "Found argument copy elision candidate: " << *AI << '\n'); + LLVM_DEBUG(dbgs() << "Found argument copy elision candidate: " << *AI + << '\n'); // Mark this alloca and store for argument copy elision. *Info = StaticAllocaInfo::Elidable; @@ -8541,8 +8778,9 @@ static void tryToElideArgumentCopy( int OldIndex = AllocaIndex; MachineFrameInfo &MFI = FuncInfo->MF->getFrameInfo(); if (MFI.getObjectSize(FixedIndex) != MFI.getObjectSize(OldIndex)) { - DEBUG(dbgs() << " argument copy elision failed due to bad fixed stack " - "object size\n"); + LLVM_DEBUG( + dbgs() << " argument copy elision failed due to bad fixed stack " + "object size\n"); return; } unsigned RequiredAlignment = AI->getAlignment(); @@ -8551,16 +8789,16 @@ static void tryToElideArgumentCopy( AI->getAllocatedType()); } if (MFI.getObjectAlignment(FixedIndex) < RequiredAlignment) { - DEBUG(dbgs() << " argument copy elision failed: alignment of alloca " - "greater than stack argument alignment (" - << RequiredAlignment << " vs " - << MFI.getObjectAlignment(FixedIndex) << ")\n"); + LLVM_DEBUG(dbgs() << " argument copy elision failed: alignment of alloca " + "greater than stack argument alignment (" + << RequiredAlignment << " vs " + << MFI.getObjectAlignment(FixedIndex) << ")\n"); return; } // Perform the elision. Delete the old stack object and replace its only use // in the variable info map. Mark the stack object as mutable. - DEBUG({ + LLVM_DEBUG({ dbgs() << "Eliding argument copy from " << Arg << " to " << *AI << '\n' << " Replacing frame index " << OldIndex << " with " << FixedIndex << '\n'; @@ -8732,14 +8970,14 @@ void SelectionDAGISel::LowerArguments(const Function &F) { "LowerFormalArguments didn't return a valid chain!"); assert(InVals.size() == Ins.size() && "LowerFormalArguments didn't emit the correct number of values!"); - DEBUG({ - for (unsigned i = 0, e = Ins.size(); i != e; ++i) { - assert(InVals[i].getNode() && - "LowerFormalArguments emitted a null value!"); - assert(EVT(Ins[i].VT) == InVals[i].getValueType() && - "LowerFormalArguments emitted a value with the wrong type!"); - } - }); + LLVM_DEBUG({ + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { + assert(InVals[i].getNode() && + "LowerFormalArguments emitted a null value!"); + assert(EVT(Ins[i].VT) == InVals[i].getValueType() && + "LowerFormalArguments emitted a value with the wrong type!"); + } + }); // Update the DAG with the new chain value resulting from argument lowering. DAG.setRoot(NewRoot); @@ -9351,7 +9589,7 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, } BitTestInfo BTI; - std::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) { + llvm::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) { // Sort by probability first, number of bits second, bit mask third. if (a.ExtraProb != b.ExtraProb) return a.ExtraProb > b.ExtraProb; @@ -9550,15 +9788,15 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, // checked first. However, two clusters can have the same probability in // which case their relative ordering is non-deterministic. So we use Low // as a tie-breaker as clusters are guaranteed to never overlap. - std::sort(W.FirstCluster, W.LastCluster + 1, - [](const CaseCluster &a, const CaseCluster &b) { + llvm::sort(W.FirstCluster, W.LastCluster + 1, + [](const CaseCluster &a, const CaseCluster &b) { return a.Prob != b.Prob ? a.Prob > b.Prob : a.Low->getValue().slt(b.Low->getValue()); }); // Rearrange the case blocks so that the last one falls through if possible - // without without changing the order of probabilities. + // without changing the order of probabilities. for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster; ) { --I; if (I->Prob > W.LastCluster->Prob) @@ -9883,8 +10121,8 @@ MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster( if (!SwitchPeeled) return SwitchMBB; - DEBUG(dbgs() << "Peeled one top case in switch stmt, prob: " << TopCaseProb - << "\n"); + LLVM_DEBUG(dbgs() << "Peeled one top case in switch stmt, prob: " + << TopCaseProb << "\n"); // Record the MBB for the peeled switch statement. MachineFunction::iterator BBI(SwitchMBB); @@ -9901,10 +10139,11 @@ MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster( Clusters.erase(PeeledCaseIt); for (CaseCluster &CC : Clusters) { - DEBUG(dbgs() << "Scale the probablity for one cluster, before scaling: " - << CC.Prob << "\n"); + LLVM_DEBUG( + dbgs() << "Scale the probablity for one cluster, before scaling: " + << CC.Prob << "\n"); CC.Prob = scaleCaseProbality(CC.Prob, TopCaseProb); - DEBUG(dbgs() << "After scaling: " << CC.Prob << "\n"); + LLVM_DEBUG(dbgs() << "After scaling: " << CC.Prob << "\n"); } PeeledCaseProb = TopCaseProb; return PeeledSwitchMBB; @@ -9983,11 +10222,13 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { findJumpTables(Clusters, &SI, DefaultMBB); findBitTestClusters(Clusters, &SI); - DEBUG({ + LLVM_DEBUG({ dbgs() << "Case clusters: "; for (const CaseCluster &C : Clusters) { - if (C.Kind == CC_JumpTable) dbgs() << "JT:"; - if (C.Kind == CC_BitTests) dbgs() << "BT:"; + if (C.Kind == CC_JumpTable) + dbgs() << "JT:"; + if (C.Kind == CC_BitTests) + dbgs() << "BT:"; C.Low->getValue().print(dbgs(), true); if (C.Low != C.High) { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 9e7c2bc6821b..e421984b8af2 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -21,7 +21,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/ISDOpcodes.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetLowering.h" @@ -33,6 +32,7 @@ #include "llvm/Support/BranchProbability.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -116,9 +116,12 @@ class SelectionDAGBuilder { unsigned getSDNodeOrder() { return SDNodeOrder; } }; + /// DanglingDebugInfoVector - Helper type for DanglingDebugInfoMap. + typedef std::vector<DanglingDebugInfo> DanglingDebugInfoVector; + /// DanglingDebugInfoMap - Keeps track of dbg_values for which we have not /// yet seen the referent. We defer handling these until we do see it. - DenseMap<const Value*, DanglingDebugInfo> DanglingDebugInfoMap; + DenseMap<const Value*, DanglingDebugInfoVector> DanglingDebugInfoMap; public: /// PendingLoads - Loads are not emitted to the program immediately. We bunch @@ -671,6 +674,12 @@ public: /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise. SDValue getCopyFromRegs(const Value *V, Type *Ty); + /// If we have dangling debug info that describes \p Variable, or an + /// overlapping part of variable considering the \p Expr, then this method + /// weill drop that debug info as it isn't valid any longer. + void dropDanglingDebugInfo(const DILocalVariable *Variable, + const DIExpression *Expr); + // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V, // generate the debug data structures now that we've seen its definition. void resolveDanglingDebugInfo(const Value *V, SDValue Val); @@ -678,6 +687,13 @@ public: SDValue getValue(const Value *V); bool findValue(const Value *V) const; + /// Return the SDNode for the specified IR value if it exists. + SDNode *getNodeForIRValue(const Value *V) { + if (NodeMap.find(V) == NodeMap.end()) + return nullptr; + return NodeMap[V].getNode(); + } + SDValue getNonRegisterValue(const Value *V); SDValue getValueImpl(const Value *V); @@ -696,13 +712,13 @@ public: void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, - Instruction::BinaryOps Opc, BranchProbability TW, - BranchProbability FW, bool InvertCond); + Instruction::BinaryOps Opc, BranchProbability TProb, + BranchProbability FProb, bool InvertCond); void EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, - BranchProbability TW, BranchProbability FW, + BranchProbability TProb, BranchProbability FProb, bool InvertCond); bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases); bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB); @@ -774,11 +790,11 @@ public: }; /// Lower \p SLI into a STATEPOINT instruction. - SDValue LowerAsSTATEPOINT(StatepointLoweringInfo &SLI); + SDValue LowerAsSTATEPOINT(StatepointLoweringInfo &SI); // This function is responsible for the whole statepoint lowering process. // It uniformly handles invoke and call statepoints. - void LowerStatepoint(ImmutableStatepoint Statepoint, + void LowerStatepoint(ImmutableStatepoint ISP, const BasicBlock *EHPadBB = nullptr); void LowerCallSiteWithDeoptBundle(ImmutableCallSite CS, SDValue Callee, @@ -838,7 +854,7 @@ private: void visitInvoke(const InvokeInst &I); void visitResume(const ResumeInst &I); - void visitBinary(const User &I, unsigned OpCode); + void visitBinary(const User &I, unsigned Opcode); void visitShift(const User &I, unsigned Opcode); void visitAdd(const User &I) { visitBinary(I, ISD::ADD); } void visitFAdd(const User &I) { visitBinary(I, ISD::FADD); } @@ -881,7 +897,7 @@ private: void visitExtractValue(const User &I); void visitInsertValue(const User &I); - void visitLandingPad(const LandingPadInst &I); + void visitLandingPad(const LandingPadInst &LP); void visitGetElementPtr(const User &I); void visitSelect(const User &I); @@ -926,7 +942,7 @@ private: const BasicBlock *EHPadBB = nullptr); // These two are implemented in StatepointLowering.cpp - void visitGCRelocate(const GCRelocateInst &I); + void visitGCRelocate(const GCRelocateInst &Relocate); void visitGCResult(const GCResultInst &I); void visitVectorReduce(const CallInst &I, unsigned Intrinsic); @@ -1036,9 +1052,17 @@ struct RegsForValue { /// Add this value to the specified inlineasm node operand list. This adds the /// code marker, matching input operand index (if applicable), and includes /// the number of values added into it. - void AddInlineAsmOperands(unsigned Kind, bool HasMatching, + void AddInlineAsmOperands(unsigned Code, bool HasMatching, unsigned MatchingIdx, const SDLoc &dl, SelectionDAG &DAG, std::vector<SDValue> &Ops) const; + + /// Check if the total RegCount is greater than one. + bool occupiesMultipleRegs() const { + return std::accumulate(RegCount.begin(), RegCount.end(), 0) > 1; + } + + /// Return a list of registers and their sizes. + SmallVector<std::pair<unsigned, unsigned>, 4> getRegsAndSizes() const; }; } // end namespace llvm diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index dd30dc16378c..fa341e8b5fa5 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -20,7 +20,6 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetInstrInfo.h" @@ -28,18 +27,21 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/ModuleSlotTracker.h" #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h" #include "llvm/Support/Printable.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetIntrinsicInfo.h" @@ -85,6 +87,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd"; case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub"; case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd"; + case ISD::ATOMIC_LOAD_CLR: return "AtomicLoadClr"; case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr"; case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor"; case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand"; @@ -176,20 +179,30 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FMAXNAN: return "fmaxnan"; case ISD::FNEG: return "fneg"; case ISD::FSQRT: return "fsqrt"; + case ISD::STRICT_FSQRT: return "strict_fsqrt"; case ISD::FSIN: return "fsin"; + case ISD::STRICT_FSIN: return "strict_fsin"; case ISD::FCOS: return "fcos"; + case ISD::STRICT_FCOS: return "strict_fcos"; case ISD::FSINCOS: return "fsincos"; case ISD::FTRUNC: return "ftrunc"; case ISD::FFLOOR: return "ffloor"; case ISD::FCEIL: return "fceil"; case ISD::FRINT: return "frint"; + case ISD::STRICT_FRINT: return "strict_frint"; case ISD::FNEARBYINT: return "fnearbyint"; + case ISD::STRICT_FNEARBYINT: return "strict_fnearbyint"; case ISD::FROUND: return "fround"; case ISD::FEXP: return "fexp"; + case ISD::STRICT_FEXP: return "strict_fexp"; case ISD::FEXP2: return "fexp2"; + case ISD::STRICT_FEXP2: return "strict_fexp2"; case ISD::FLOG: return "flog"; + case ISD::STRICT_FLOG: return "strict_flog"; case ISD::FLOG2: return "flog2"; + case ISD::STRICT_FLOG2: return "strict_flog2"; case ISD::FLOG10: return "flog10"; + case ISD::STRICT_FLOG10: return "strict_flog10"; // Binary operators case ISD::ADD: return "add"; @@ -214,24 +227,31 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::ROTL: return "rotl"; case ISD::ROTR: return "rotr"; case ISD::FADD: return "fadd"; + case ISD::STRICT_FADD: return "strict_fadd"; case ISD::FSUB: return "fsub"; + case ISD::STRICT_FSUB: return "strict_fsub"; case ISD::FMUL: return "fmul"; + case ISD::STRICT_FMUL: return "strict_fmul"; case ISD::FDIV: return "fdiv"; + case ISD::STRICT_FDIV: return "strict_fdiv"; case ISD::FMA: return "fma"; + case ISD::STRICT_FMA: return "strict_fma"; case ISD::FMAD: return "fmad"; case ISD::FREM: return "frem"; + case ISD::STRICT_FREM: return "strict_frem"; case ISD::FCOPYSIGN: return "fcopysign"; case ISD::FGETSIGN: return "fgetsign"; case ISD::FCANONICALIZE: return "fcanonicalize"; case ISD::FPOW: return "fpow"; + case ISD::STRICT_FPOW: return "strict_fpow"; case ISD::SMIN: return "smin"; case ISD::SMAX: return "smax"; case ISD::UMIN: return "umin"; case ISD::UMAX: return "umax"; case ISD::FPOWI: return "fpowi"; + case ISD::STRICT_FPOWI: return "strict_fpowi"; case ISD::SETCC: return "setcc"; - case ISD::SETCCE: return "setcce"; case ISD::SETCCCARRY: return "setcccarry"; case ISD::SELECT: return "select"; case ISD::VSELECT: return "vselect"; @@ -366,7 +386,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::SETFALSE2: return "setfalse2"; } case ISD::VECREDUCE_FADD: return "vecreduce_fadd"; + case ISD::VECREDUCE_STRICT_FADD: return "vecreduce_strict_fadd"; case ISD::VECREDUCE_FMUL: return "vecreduce_fmul"; + case ISD::VECREDUCE_STRICT_FMUL: return "vecreduce_strict_fmul"; case ISD::VECREDUCE_ADD: return "vecreduce_add"; case ISD::VECREDUCE_MUL: return "vecreduce_mul"; case ISD::VECREDUCE_AND: return "vecreduce_and"; @@ -401,6 +423,32 @@ static Printable PrintNodeId(const SDNode &Node) { }); } +// Print the MMO with more information from the SelectionDAG. +static void printMemOperand(raw_ostream &OS, const MachineMemOperand &MMO, + const MachineFunction *MF, const Module *M, + const MachineFrameInfo *MFI, + const TargetInstrInfo *TII, LLVMContext &Ctx) { + ModuleSlotTracker MST(M); + if (MF) + MST.incorporateFunction(MF->getFunction()); + SmallVector<StringRef, 0> SSNs; + MMO.print(OS, MST, SSNs, Ctx, MFI, TII); +} + +static void printMemOperand(raw_ostream &OS, const MachineMemOperand &MMO, + const SelectionDAG *G) { + if (G) { + const MachineFunction *MF = &G->getMachineFunction(); + return printMemOperand(OS, MMO, MF, MF->getFunction().getParent(), + &MF->getFrameInfo(), G->getSubtarget().getInstrInfo(), + *G->getContext()); + } else { + LLVMContext Ctx; + return printMemOperand(OS, MMO, /*MF=*/nullptr, /*M=*/nullptr, + /*MFI=*/nullptr, /*TII=*/nullptr, Ctx); + } +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void SDNode::dump() const { dump(nullptr); } @@ -430,9 +478,6 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (getFlags().hasExact()) OS << " exact"; - if (getFlags().hasUnsafeAlgebra()) - OS << " unsafe"; - if (getFlags().hasNoNaNs()) OS << " nnan"; @@ -448,6 +493,12 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (getFlags().hasAllowContract()) OS << " contract"; + if (getFlags().hasApproximateFuncs()) + OS << " afn"; + + if (getFlags().hasAllowReassociation()) + OS << " reassoc"; + if (getFlags().hasVectorReduction()) OS << " vector-reduction"; @@ -457,7 +508,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << "Mem:"; for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(), e = MN->memoperands_end(); i != e; ++i) { - OS << **i; + printMemOperand(OS, **i, G); if (std::next(i) != e) OS << " "; } @@ -549,7 +600,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << ":" << N->getVT().getEVTString(); } else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) { - OS << "<" << *LD->getMemOperand(); + OS << "<"; + + printMemOperand(OS, *LD->getMemOperand(), G); bool doExt = true; switch (LD->getExtensionType()) { @@ -567,7 +620,8 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << ">"; } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) { - OS << "<" << *ST->getMemOperand(); + OS << "<"; + printMemOperand(OS, *ST->getMemOperand(), G); if (ST->isTruncatingStore()) OS << ", trunc to " << ST->getMemoryVT().getEVTString(); @@ -578,7 +632,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << ">"; } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) { - OS << "<" << *M->getMemOperand() << ">"; + OS << "<"; + printMemOperand(OS, *M->getMemOperand(), G); + OS << ">"; } else if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(this)) { int64_t offset = BA->getOffset(); @@ -608,6 +664,8 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (getNodeId() != -1) OS << " [ID=" << getNodeId() << ']'; + if (!(isa<ConstantSDNode>(this) || (isa<ConstantFPSDNode>(this)))) + OS << "# D:" << isDivergent(); if (!G) return; @@ -779,4 +837,8 @@ void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const { if (i) OS << ", "; else OS << " "; printOperand(OS, G, getOperand(i)); } + if (DebugLoc DL = getDebugLoc()) { + OS << ", "; + DL.print(OS); + } } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index bd9fcfb5c1e8..f7bd8847bee3 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -29,6 +29,7 @@ #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" @@ -43,7 +44,6 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachinePassRegistry.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -82,6 +82,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" +#include "llvm/Support/MachineValueType.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetIntrinsicInfo.h" @@ -196,7 +197,7 @@ defaultListDAGScheduler("default", "Best scheduler for the target", namespace llvm { //===--------------------------------------------------------------------===// - /// \brief This class is used by SelectionDAGISel to temporarily override + /// This class is used by SelectionDAGISel to temporarily override /// the optimization level on a per-function basis. class OptLevelChanger { SelectionDAGISel &IS; @@ -211,26 +212,27 @@ namespace llvm { return; IS.OptLevel = NewOptLevel; IS.TM.setOptLevel(NewOptLevel); - DEBUG(dbgs() << "\nChanging optimization level for Function " - << IS.MF->getFunction().getName() << "\n"); - DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel - << " ; After: -O" << NewOptLevel << "\n"); + LLVM_DEBUG(dbgs() << "\nChanging optimization level for Function " + << IS.MF->getFunction().getName() << "\n"); + LLVM_DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel << " ; After: -O" + << NewOptLevel << "\n"); SavedFastISel = IS.TM.Options.EnableFastISel; if (NewOptLevel == CodeGenOpt::None) { IS.TM.setFastISel(IS.TM.getO0WantsFastISel()); - DEBUG(dbgs() << "\tFastISel is " - << (IS.TM.Options.EnableFastISel ? "enabled" : "disabled") - << "\n"); + LLVM_DEBUG( + dbgs() << "\tFastISel is " + << (IS.TM.Options.EnableFastISel ? "enabled" : "disabled") + << "\n"); } } ~OptLevelChanger() { if (IS.OptLevel == SavedOptLevel) return; - DEBUG(dbgs() << "\nRestoring optimization level for Function " - << IS.MF->getFunction().getName() << "\n"); - DEBUG(dbgs() << "\tBefore: -O" << IS.OptLevel - << " ; After: -O" << SavedOptLevel << "\n"); + LLVM_DEBUG(dbgs() << "\nRestoring optimization level for Function " + << IS.MF->getFunction().getName() << "\n"); + LLVM_DEBUG(dbgs() << "\tBefore: -O" << IS.OptLevel << " ; After: -O" + << SavedOptLevel << "\n"); IS.OptLevel = SavedOptLevel; IS.TM.setOptLevel(SavedOptLevel); IS.TM.setFastISel(SavedFastISel); @@ -326,9 +328,9 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<AAResultsWrapperPass>(); AU.addRequired<GCModuleInfo>(); AU.addRequired<StackProtector>(); - AU.addPreserved<StackProtector>(); AU.addPreserved<GCModuleInfo>(); AU.addRequired<TargetLibraryInfoWrapperPass>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); if (UseMBPI && OptLevel != CodeGenOpt::None) AU.addRequired<BranchProbabilityInfoWrapperPass>(); MachineFunctionPass::getAnalysisUsage(AU); @@ -410,11 +412,12 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; - DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); + LLVM_DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT, LI); - CurDAG->init(*MF, *ORE, this); + CurDAG->init(*MF, *ORE, this, LibInfo, + getAnalysisIfAvailable<DivergenceAnalysis>()); FuncInfo->set(Fn, *MF, CurDAG); // Now get the optional analyzes if we want to. @@ -513,8 +516,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // FIXME: VR def may not be in entry block. Def->getParent()->insert(std::next(InsertPos), MI); } else - DEBUG(dbgs() << "Dropping debug info for dead vreg" - << TargetRegisterInfo::virtReg2Index(Reg) << "\n"); + LLVM_DEBUG(dbgs() << "Dropping debug info for dead vreg" + << TargetRegisterInfo::virtReg2Index(Reg) << "\n"); } // If Reg is live-in then update debug info to track its copy in a vreg. @@ -621,8 +624,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // at this point. FuncInfo->clear(); - DEBUG(dbgs() << "*** MachineFunction at end of ISel ***\n"); - DEBUG(MF->print(dbgs())); + LLVM_DEBUG(dbgs() << "*** MachineFunction at end of ISel ***\n"); + LLVM_DEBUG(MF->print(dbgs())); return true; } @@ -711,6 +714,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { int BlockNumber = -1; (void)BlockNumber; bool MatchFilterBB = false; (void)MatchFilterBB; + TargetTransformInfo &TTI = + getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*FuncInfo->Fn); // Pre-type legalization allow creation of any node types. CurDAG->NewNodesMustHaveLegalTypes = false; @@ -718,7 +723,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { #ifndef NDEBUG MatchFilterBB = (FilterDAGBasicBlockName.empty() || FilterDAGBasicBlockName == - FuncInfo->MBB->getBasicBlock()->getName().str()); + FuncInfo->MBB->getBasicBlock()->getName()); #endif #ifdef NDEBUG if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs || @@ -730,9 +735,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { BlockName = (MF->getName() + ":" + FuncInfo->MBB->getBasicBlock()->getName()).str(); } - DEBUG(dbgs() << "Initial selection DAG: " << printMBBReference(*FuncInfo->MBB) - << " '" << BlockName << "'\n"; - CurDAG->dump()); + LLVM_DEBUG(dbgs() << "Initial selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); if (ViewDAGCombine1 && MatchFilterBB) CurDAG->viewGraph("dag-combine1 input for " + BlockName); @@ -744,10 +750,13 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(BeforeLegalizeTypes, AA, OptLevel); } - DEBUG(dbgs() << "Optimized lowered selection DAG: " - << printMBBReference(*FuncInfo->MBB) << " '" << BlockName - << "'\n"; - CurDAG->dump()); + if (TTI.hasBranchDivergence()) + CurDAG->VerifyDAGDiverence(); + + LLVM_DEBUG(dbgs() << "Optimized lowered selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); // Second step, hack on the DAG until it only uses operations and types that // the target supports. @@ -761,10 +770,13 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { Changed = CurDAG->LegalizeTypes(); } - DEBUG(dbgs() << "Type-legalized selection DAG: " - << printMBBReference(*FuncInfo->MBB) << " '" << BlockName - << "'\n"; - CurDAG->dump()); + if (TTI.hasBranchDivergence()) + CurDAG->VerifyDAGDiverence(); + + LLVM_DEBUG(dbgs() << "Type-legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); // Only allow creation of legal node types. CurDAG->NewNodesMustHaveLegalTypes = true; @@ -780,10 +792,13 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(AfterLegalizeTypes, AA, OptLevel); } - DEBUG(dbgs() << "Optimized type-legalized selection DAG: " - << printMBBReference(*FuncInfo->MBB) << " '" << BlockName - << "'\n"; - CurDAG->dump()); + if (TTI.hasBranchDivergence()) + CurDAG->VerifyDAGDiverence(); + + LLVM_DEBUG(dbgs() << "Optimized type-legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); } { @@ -793,10 +808,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { } if (Changed) { - DEBUG(dbgs() << "Vector-legalized selection DAG: " - << printMBBReference(*FuncInfo->MBB) << " '" << BlockName - << "'\n"; - CurDAG->dump()); + LLVM_DEBUG(dbgs() << "Vector-legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); { NamedRegionTimer T("legalize_types2", "Type Legalization 2", GroupName, @@ -804,10 +819,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->LegalizeTypes(); } - DEBUG(dbgs() << "Vector/type-legalized selection DAG: " - << printMBBReference(*FuncInfo->MBB) << " '" << BlockName - << "'\n"; - CurDAG->dump()); + LLVM_DEBUG(dbgs() << "Vector/type-legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); if (ViewDAGCombineLT && MatchFilterBB) CurDAG->viewGraph("dag-combine-lv input for " + BlockName); @@ -819,10 +834,13 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(AfterLegalizeVectorOps, AA, OptLevel); } - DEBUG(dbgs() << "Optimized vector-legalized selection DAG: " - << printMBBReference(*FuncInfo->MBB) << " '" << BlockName - << "'\n"; - CurDAG->dump()); + LLVM_DEBUG(dbgs() << "Optimized vector-legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); + + if (TTI.hasBranchDivergence()) + CurDAG->VerifyDAGDiverence(); } if (ViewLegalizeDAGs && MatchFilterBB) @@ -834,10 +852,13 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Legalize(); } - DEBUG(dbgs() << "Legalized selection DAG: " - << printMBBReference(*FuncInfo->MBB) << " '" << BlockName - << "'\n"; - CurDAG->dump()); + if (TTI.hasBranchDivergence()) + CurDAG->VerifyDAGDiverence(); + + LLVM_DEBUG(dbgs() << "Legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); if (ViewDAGCombine2 && MatchFilterBB) CurDAG->viewGraph("dag-combine2 input for " + BlockName); @@ -849,10 +870,13 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(AfterLegalizeDAG, AA, OptLevel); } - DEBUG(dbgs() << "Optimized legalized selection DAG: " - << printMBBReference(*FuncInfo->MBB) << " '" << BlockName - << "'\n"; - CurDAG->dump()); + if (TTI.hasBranchDivergence()) + CurDAG->VerifyDAGDiverence(); + + LLVM_DEBUG(dbgs() << "Optimized legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); if (OptLevel != CodeGenOpt::None) ComputeLiveOutVRegInfo(); @@ -868,10 +892,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { DoInstructionSelection(); } - DEBUG(dbgs() << "Selected selection DAG: " - << printMBBReference(*FuncInfo->MBB) << " '" << BlockName - << "'\n"; - CurDAG->dump()); + LLVM_DEBUG(dbgs() << "Selected selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); if (ViewSchedDAGs && MatchFilterBB) CurDAG->viewGraph("scheduler input for " + BlockName); @@ -937,10 +961,62 @@ public: } // end anonymous namespace +// This function is used to enforce the topological node id property +// property leveraged during Instruction selection. Before selection all +// nodes are given a non-negative id such that all nodes have a larger id than +// their operands. As this holds transitively we can prune checks that a node N +// is a predecessor of M another by not recursively checking through M's +// operands if N's ID is larger than M's ID. This is significantly improves +// performance of for various legality checks (e.g. IsLegalToFold / +// UpdateChains). + +// However, when we fuse multiple nodes into a single node +// during selection we may induce a predecessor relationship between inputs and +// outputs of distinct nodes being merged violating the topological property. +// Should a fused node have a successor which has yet to be selected, our +// legality checks would be incorrect. To avoid this we mark all unselected +// sucessor nodes, i.e. id != -1 as invalid for pruning by bit-negating (x => +// (-(x+1))) the ids and modify our pruning check to ignore negative Ids of M. +// We use bit-negation to more clearly enforce that node id -1 can only be +// achieved by selected nodes). As the conversion is reversable the original Id, +// topological pruning can still be leveraged when looking for unselected nodes. +// This method is call internally in all ISel replacement calls. +void SelectionDAGISel::EnforceNodeIdInvariant(SDNode *Node) { + SmallVector<SDNode *, 4> Nodes; + Nodes.push_back(Node); + + while (!Nodes.empty()) { + SDNode *N = Nodes.pop_back_val(); + for (auto *U : N->uses()) { + auto UId = U->getNodeId(); + if (UId > 0) { + InvalidateNodeId(U); + Nodes.push_back(U); + } + } + } +} + +// InvalidateNodeId - As discusses in EnforceNodeIdInvariant, mark a +// NodeId with the equivalent node id which is invalid for topological +// pruning. +void SelectionDAGISel::InvalidateNodeId(SDNode *N) { + int InvalidId = -(N->getNodeId() + 1); + N->setNodeId(InvalidId); +} + +// getUninvalidatedNodeId - get original uninvalidated node id. +int SelectionDAGISel::getUninvalidatedNodeId(SDNode *N) { + int Id = N->getNodeId(); + if (Id < -1) + return -(Id + 1); + return Id; +} + void SelectionDAGISel::DoInstructionSelection() { - DEBUG(dbgs() << "===== Instruction selection begins: " - << printMBBReference(*FuncInfo->MBB) << " '" - << FuncInfo->MBB->getName() << "'\n"); + LLVM_DEBUG(dbgs() << "===== Instruction selection begins: " + << printMBBReference(*FuncInfo->MBB) << " '" + << FuncInfo->MBB->getName() << "'\n"); PreprocessISelDAG(); @@ -972,6 +1048,33 @@ void SelectionDAGISel::DoInstructionSelection() { if (Node->use_empty()) continue; +#ifndef NDEBUG + SmallVector<SDNode *, 4> Nodes; + Nodes.push_back(Node); + + while (!Nodes.empty()) { + auto N = Nodes.pop_back_val(); + if (N->getOpcode() == ISD::TokenFactor || N->getNodeId() < 0) + continue; + for (const SDValue &Op : N->op_values()) { + if (Op->getOpcode() == ISD::TokenFactor) + Nodes.push_back(Op.getNode()); + else { + // We rely on topological ordering of node ids for checking for + // cycles when fusing nodes during selection. All unselected nodes + // successors of an already selected node should have a negative id. + // This assertion will catch such cases. If this assertion triggers + // it is likely you using DAG-level Value/Node replacement functions + // (versus equivalent ISEL replacement) in backend-specific + // selections. See comment in EnforceNodeIdInvariant for more + // details. + assert(Op->getNodeId() != -1 && + "Node has already selected predecessor node"); + } + } + } +#endif + // When we are using non-default rounding modes or FP exception behavior // FP operations are represented by StrictFP pseudo-operations. They // need to be simplified here so that the target-specific instruction @@ -985,13 +1088,16 @@ void SelectionDAGISel::DoInstructionSelection() { if (Node->isStrictFPOpcode()) Node = CurDAG->mutateStrictFPToFP(Node); + LLVM_DEBUG(dbgs() << "\nISEL: Starting selection on root node: "; + Node->dump(CurDAG)); + Select(Node); } CurDAG->setRoot(Dummy.getValue()); } - DEBUG(dbgs() << "===== Instruction selection ends:\n"); + LLVM_DEBUG(dbgs() << "\n===== Instruction selection ends:\n"); PostprocessISelDAG(); } @@ -1264,7 +1370,7 @@ static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) { } auto DLoc = isa<Instruction>(SwiftErrorVal) - ? dyn_cast<Instruction>(SwiftErrorVal)->getDebugLoc() + ? cast<Instruction>(SwiftErrorVal)->getDebugLoc() : DebugLoc(); const auto *TII = FuncInfo->MF->getSubtarget().getInstrInfo(); @@ -1381,7 +1487,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Initialize the Fast-ISel state, if needed. FastISel *FastIS = nullptr; if (TM.Options.EnableFastISel) { - DEBUG(dbgs() << "Enabling fast-isel\n"); + LLVM_DEBUG(dbgs() << "Enabling fast-isel\n"); FastIS = TLI->createFastISel(*FuncInfo, LibInfo); } @@ -1398,6 +1504,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FuncInfo->MBB = FuncInfo->MBBMap[&Fn.getEntryBlock()]; FuncInfo->InsertPt = FuncInfo->MBB->begin(); + CurDAG->setFunctionLoweringInfo(FuncInfo); + if (!FastIS) { LowerArguments(Fn); } else { @@ -1435,6 +1543,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { processDbgDeclares(FuncInfo); // Iterate over all basic blocks in the function. + StackProtector &SP = getAnalysis<StackProtector>(); for (const BasicBlock *LLVMBB : RPOT) { if (OptLevel != CodeGenOpt::None) { bool AllPredsVisited = true; @@ -1604,7 +1713,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FastIS->recomputeInsertPt(); } - if (getAnalysis<StackProtector>().shouldEmitSDCheck(*LLVMBB)) { + if (SP.shouldEmitSDCheck(*LLVMBB)) { bool FunctionBasedInstrumentation = TLI->getSSPStackGuardCheck(*Fn.getParent()); SDB->SPDescriptor.initialize(LLVMBB, FuncInfo->MBBMap[LLVMBB], @@ -1630,11 +1739,15 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FastIS->removeDeadCode(FuncInfo->InsertPt, FuncInfo->MBB->end()); } + if (FastIS) + FastIS->finishBasicBlock(); FinishBasicBlock(); FuncInfo->PHINodesToUpdate.clear(); ElidedArgCopyInstrs.clear(); } + SP.copyToMachineFrameInfo(MF->getFrameInfo()); + propagateSwiftErrorVRegs(FuncInfo); delete FastIS; @@ -1728,12 +1841,12 @@ FindSplitPointForStackProtector(MachineBasicBlock *BB) { void SelectionDAGISel::FinishBasicBlock() { - DEBUG(dbgs() << "Total amount of phi nodes to update: " - << FuncInfo->PHINodesToUpdate.size() << "\n"; - for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) - dbgs() << "Node " << i << " : (" - << FuncInfo->PHINodesToUpdate[i].first - << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n"); + LLVM_DEBUG(dbgs() << "Total amount of phi nodes to update: " + << FuncInfo->PHINodesToUpdate.size() << "\n"; + for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; + ++i) dbgs() + << "Node " << i << " : (" << FuncInfo->PHINodesToUpdate[i].first + << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n"); // Next, now that we know what the last MBB the LLVM BB expanded is, update // PHI nodes in successors. @@ -2012,7 +2125,7 @@ bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS, return true; // If the actual AND mask is allowing unallowed bits, this doesn't match. - if (ActualMask.intersects(~DesiredMask)) + if (!ActualMask.isSubsetOf(DesiredMask)) return false; // Otherwise, the DAG Combiner may have proven that the value coming in is @@ -2041,7 +2154,7 @@ bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS, return true; // If the actual AND mask is allowing unallowed bits, this doesn't match. - if (ActualMask.intersects(~DesiredMask)) + if (!ActualMask.isSubsetOf(DesiredMask)) return false; // Otherwise, the DAG Combiner may have proven that the value coming in is @@ -2134,52 +2247,44 @@ static SDNode *findGlueUse(SDNode *N) { return nullptr; } -/// findNonImmUse - Return true if "Use" is a non-immediate use of "Def". -/// This function iteratively traverses up the operand chain, ignoring -/// certain nodes. -static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse, - SDNode *Root, SmallPtrSetImpl<SDNode*> &Visited, +/// findNonImmUse - Return true if "Def" is a predecessor of "Root" via a path +/// beyond "ImmedUse". We may ignore chains as they are checked separately. +static bool findNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse, bool IgnoreChains) { - // The NodeID's are given uniques ID's where a node ID is guaranteed to be - // greater than all of its (recursive) operands. If we scan to a point where - // 'use' is smaller than the node we're scanning for, then we know we will - // never find it. - // - // The Use may be -1 (unassigned) if it is a newly allocated node. This can - // happen because we scan down to newly selected nodes in the case of glue - // uses. - std::vector<SDNode *> WorkList; - WorkList.push_back(Use); - - while (!WorkList.empty()) { - Use = WorkList.back(); - WorkList.pop_back(); - if (Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1) - continue; + SmallPtrSet<const SDNode *, 16> Visited; + SmallVector<const SDNode *, 16> WorkList; + // Only check if we have non-immediate uses of Def. + if (ImmedUse->isOnlyUserOf(Def)) + return false; - // Don't revisit nodes if we already scanned it and didn't fail, we know we - // won't fail if we scan it again. - if (!Visited.insert(Use).second) + // We don't care about paths to Def that go through ImmedUse so mark it + // visited and mark non-def operands as used. + Visited.insert(ImmedUse); + for (const SDValue &Op : ImmedUse->op_values()) { + SDNode *N = Op.getNode(); + // Ignore chain deps (they are validated by + // HandleMergeInputChains) and immediate uses + if ((Op.getValueType() == MVT::Other && IgnoreChains) || N == Def) continue; + if (!Visited.insert(N).second) + continue; + WorkList.push_back(N); + } - for (const SDValue &Op : Use->op_values()) { - // Ignore chain uses, they are validated by HandleMergeInputChains. - if (Op.getValueType() == MVT::Other && IgnoreChains) - continue; - + // Initialize worklist to operands of Root. + if (Root != ImmedUse) { + for (const SDValue &Op : Root->op_values()) { SDNode *N = Op.getNode(); - if (N == Def) { - if (Use == ImmedUse || Use == Root) - continue; // We are not looking for immediate use. - assert(N != Root); - return true; - } - - // Traverse up the operand chain. + // Ignore chains (they are validated by HandleMergeInputChains) + if ((Op.getValueType() == MVT::Other && IgnoreChains) || N == Def) + continue; + if (!Visited.insert(N).second) + continue; WorkList.push_back(N); } } - return false; + + return SDNode::hasPredecessorHelper(Def, Visited, WorkList, 0, true); } /// IsProfitableToFold - Returns true if it's profitable to fold the specific @@ -2199,7 +2304,7 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, // If Root use can somehow reach N through a path that that doesn't contain // U then folding N would create a cycle. e.g. In the following - // diagram, Root can reach N through X. If N is folded into into Root, then + // diagram, Root can reach N through X. If N is folded into Root, then // X is both a predecessor and a successor of U. // // [N*] // @@ -2251,13 +2356,12 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, // If our query node has a glue result with a use, we've walked up it. If // the user (which has already been selected) has a chain or indirectly uses - // the chain, our WalkChainUsers predicate will not consider it. Because of + // the chain, HandleMergeInputChains will not consider it. Because of // this, we cannot ignore chains in this predicate. IgnoreChains = false; } - SmallPtrSet<SDNode*, 16> Visited; - return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains); + return !findNonImmUse(Root, N.getNode(), U, IgnoreChains); } void SelectionDAGISel::Select_INLINEASM(SDNode *N) { @@ -2360,7 +2464,8 @@ void SelectionDAGISel::UpdateChains( std::replace(ChainNodesMatched.begin(), ChainNodesMatched.end(), N, static_cast<SDNode *>(nullptr)); }); - CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain); + if (ChainNode->getOpcode() != ISD::TokenFactor) + ReplaceUses(ChainVal, InputChain); // If the node became dead and we haven't already seen it, delete it. if (ChainNode != NodeToMatch && ChainNode->use_empty() && @@ -2372,144 +2477,7 @@ void SelectionDAGISel::UpdateChains( if (!NowDeadNodes.empty()) CurDAG->RemoveDeadNodes(NowDeadNodes); - DEBUG(dbgs() << "ISEL: Match complete!\n"); -} - -enum ChainResult { - CR_Simple, - CR_InducesCycle, - CR_LeadsToInteriorNode -}; - -/// WalkChainUsers - Walk down the users of the specified chained node that is -/// part of the pattern we're matching, looking at all of the users we find. -/// This determines whether something is an interior node, whether we have a -/// non-pattern node in between two pattern nodes (which prevent folding because -/// it would induce a cycle) and whether we have a TokenFactor node sandwiched -/// between pattern nodes (in which case the TF becomes part of the pattern). -/// -/// The walk we do here is guaranteed to be small because we quickly get down to -/// already selected nodes "below" us. -static ChainResult -WalkChainUsers(const SDNode *ChainedNode, - SmallVectorImpl<SDNode *> &ChainedNodesInPattern, - DenseMap<const SDNode *, ChainResult> &TokenFactorResult, - SmallVectorImpl<SDNode *> &InteriorChainedNodes) { - ChainResult Result = CR_Simple; - - for (SDNode::use_iterator UI = ChainedNode->use_begin(), - E = ChainedNode->use_end(); UI != E; ++UI) { - // Make sure the use is of the chain, not some other value we produce. - if (UI.getUse().getValueType() != MVT::Other) continue; - - SDNode *User = *UI; - - if (User->getOpcode() == ISD::HANDLENODE) // Root of the graph. - continue; - - // If we see an already-selected machine node, then we've gone beyond the - // pattern that we're selecting down into the already selected chunk of the - // DAG. - unsigned UserOpcode = User->getOpcode(); - if (User->isMachineOpcode() || - UserOpcode == ISD::CopyToReg || - UserOpcode == ISD::CopyFromReg || - UserOpcode == ISD::INLINEASM || - UserOpcode == ISD::EH_LABEL || - UserOpcode == ISD::LIFETIME_START || - UserOpcode == ISD::LIFETIME_END) { - // If their node ID got reset to -1 then they've already been selected. - // Treat them like a MachineOpcode. - if (User->getNodeId() == -1) - continue; - } - - // If we have a TokenFactor, we handle it specially. - if (User->getOpcode() != ISD::TokenFactor) { - // If the node isn't a token factor and isn't part of our pattern, then it - // must be a random chained node in between two nodes we're selecting. - // This happens when we have something like: - // x = load ptr - // call - // y = x+4 - // store y -> ptr - // Because we structurally match the load/store as a read/modify/write, - // but the call is chained between them. We cannot fold in this case - // because it would induce a cycle in the graph. - if (!std::count(ChainedNodesInPattern.begin(), - ChainedNodesInPattern.end(), User)) - return CR_InducesCycle; - - // Otherwise we found a node that is part of our pattern. For example in: - // x = load ptr - // y = x+4 - // store y -> ptr - // This would happen when we're scanning down from the load and see the - // store as a user. Record that there is a use of ChainedNode that is - // part of the pattern and keep scanning uses. - Result = CR_LeadsToInteriorNode; - InteriorChainedNodes.push_back(User); - continue; - } - - // If we found a TokenFactor, there are two cases to consider: first if the - // TokenFactor is just hanging "below" the pattern we're matching (i.e. no - // uses of the TF are in our pattern) we just want to ignore it. Second, - // the TokenFactor can be sandwiched in between two chained nodes, like so: - // [Load chain] - // ^ - // | - // [Load] - // ^ ^ - // | \ DAG's like cheese - // / \ do you? - // / | - // [TokenFactor] [Op] - // ^ ^ - // | | - // \ / - // \ / - // [Store] - // - // In this case, the TokenFactor becomes part of our match and we rewrite it - // as a new TokenFactor. - // - // To distinguish these two cases, do a recursive walk down the uses. - auto MemoizeResult = TokenFactorResult.find(User); - bool Visited = MemoizeResult != TokenFactorResult.end(); - // Recursively walk chain users only if the result is not memoized. - if (!Visited) { - auto Res = WalkChainUsers(User, ChainedNodesInPattern, TokenFactorResult, - InteriorChainedNodes); - MemoizeResult = TokenFactorResult.insert(std::make_pair(User, Res)).first; - } - switch (MemoizeResult->second) { - case CR_Simple: - // If the uses of the TokenFactor are just already-selected nodes, ignore - // it, it is "below" our pattern. - continue; - case CR_InducesCycle: - // If the uses of the TokenFactor lead to nodes that are not part of our - // pattern that are not selected, folding would turn this into a cycle, - // bail out now. - return CR_InducesCycle; - case CR_LeadsToInteriorNode: - break; // Otherwise, keep processing. - } - - // Okay, we know we're in the interesting interior case. The TokenFactor - // is now going to be considered part of the pattern so that we rewrite its - // uses (it may have uses that are not part of the pattern) with the - // ultimate chain result of the generated code. We will also add its chain - // inputs as inputs to the ultimate TokenFactor we create. - Result = CR_LeadsToInteriorNode; - if (!Visited) { - ChainedNodesInPattern.push_back(User); - InteriorChainedNodes.push_back(User); - } - } - - return Result; + LLVM_DEBUG(dbgs() << "ISEL: Match complete!\n"); } /// HandleMergeInputChains - This implements the OPC_EmitMergeInputChains @@ -2521,47 +2489,56 @@ WalkChainUsers(const SDNode *ChainedNode, static SDValue HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched, SelectionDAG *CurDAG) { - // Used for memoization. Without it WalkChainUsers could take exponential - // time to run. - DenseMap<const SDNode *, ChainResult> TokenFactorResult; - // Walk all of the chained nodes we've matched, recursively scanning down the - // users of the chain result. This adds any TokenFactor nodes that are caught - // in between chained nodes to the chained and interior nodes list. - SmallVector<SDNode*, 3> InteriorChainedNodes; - for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) { - if (WalkChainUsers(ChainNodesMatched[i], ChainNodesMatched, - TokenFactorResult, - InteriorChainedNodes) == CR_InducesCycle) - return SDValue(); // Would induce a cycle. - } - // Okay, we have walked all the matched nodes and collected TokenFactor nodes - // that we are interested in. Form our input TokenFactor node. + SmallPtrSet<const SDNode *, 16> Visited; + SmallVector<const SDNode *, 8> Worklist; SmallVector<SDValue, 3> InputChains; - for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) { - // Add the input chain of this node to the InputChains list (which will be - // the operands of the generated TokenFactor) if it's not an interior node. - SDNode *N = ChainNodesMatched[i]; - if (N->getOpcode() != ISD::TokenFactor) { - if (std::count(InteriorChainedNodes.begin(),InteriorChainedNodes.end(),N)) - continue; + unsigned int Max = 8192; - // Otherwise, add the input chain. - SDValue InChain = ChainNodesMatched[i]->getOperand(0); - assert(InChain.getValueType() == MVT::Other && "Not a chain"); - InputChains.push_back(InChain); - continue; - } + // Quick exit on trivial merge. + if (ChainNodesMatched.size() == 1) + return ChainNodesMatched[0]->getOperand(0); - // If we have a token factor, we want to add all inputs of the token factor - // that are not part of the pattern we're matching. - for (const SDValue &Op : N->op_values()) { - if (!std::count(ChainNodesMatched.begin(), ChainNodesMatched.end(), - Op.getNode())) - InputChains.push_back(Op); - } + // Add chains that aren't already added (internal). Peek through + // token factors. + std::function<void(const SDValue)> AddChains = [&](const SDValue V) { + if (V.getValueType() != MVT::Other) + return; + if (V->getOpcode() == ISD::EntryToken) + return; + if (!Visited.insert(V.getNode()).second) + return; + if (V->getOpcode() == ISD::TokenFactor) { + for (const SDValue &Op : V->op_values()) + AddChains(Op); + } else + InputChains.push_back(V); + }; + + for (auto *N : ChainNodesMatched) { + Worklist.push_back(N); + Visited.insert(N); } + while (!Worklist.empty()) + AddChains(Worklist.pop_back_val()->getOperand(0)); + + // Skip the search if there are no chain dependencies. + if (InputChains.size() == 0) + return CurDAG->getEntryNode(); + + // If one of these chains is a successor of input, we must have a + // node that is both the predecessor and successor of the + // to-be-merged nodes. Fail. + Visited.clear(); + for (SDValue V : InputChains) + Worklist.push_back(V.getNode()); + + for (auto *N : ChainNodesMatched) + if (SDNode::hasPredecessorHelper(N, Visited, Worklist, Max, true)) + return SDValue(); + + // Return merged chain. if (InputChains.size() == 1) return InputChains[0]; return CurDAG->getNode(ISD::TokenFactor, SDLoc(ChainNodesMatched[0]), @@ -2606,8 +2583,8 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, // Move the glue if needed. if ((EmitNodeInfo & OPFL_GlueOutput) && OldGlueResultNo != -1 && (unsigned)OldGlueResultNo != ResNumResults-1) - CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldGlueResultNo), - SDValue(Res, ResNumResults-1)); + ReplaceUses(SDValue(Node, OldGlueResultNo), + SDValue(Res, ResNumResults - 1)); if ((EmitNodeInfo & OPFL_GlueOutput) != 0) --ResNumResults; @@ -2615,14 +2592,15 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, // Move the chain reference if needed. if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 && (unsigned)OldChainResultNo != ResNumResults-1) - CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo), - SDValue(Res, ResNumResults-1)); + ReplaceUses(SDValue(Node, OldChainResultNo), + SDValue(Res, ResNumResults - 1)); // Otherwise, no replacement happened because the node already exists. Replace // Uses of the old node with the new one. if (Res != Node) { - CurDAG->ReplaceAllUsesWith(Node, Res); - CurDAG->RemoveDeadNode(Node); + ReplaceNode(Node, Res); + } else { + EnforceNodeIdInvariant(Res); } return Res; @@ -2861,7 +2839,7 @@ struct MatchScope { bool HasChainNodesMatched; }; -/// \\brief A DAG update listener to keep the matching state +/// \A DAG update listener to keep the matching state /// (i.e. RecordedNodes and MatchScope) uptodate if the target is allowed to /// change the DAG while matching. X86 addressing mode matcher is an example /// for this. @@ -2939,8 +2917,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, return; case ISD::AssertSext: case ISD::AssertZext: - CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, 0), - NodeToMatch->getOperand(0)); + ReplaceUses(SDValue(NodeToMatch, 0), NodeToMatch->getOperand(0)); CurDAG->RemoveDeadNode(NodeToMatch); return; case ISD::INLINEASM: @@ -2988,9 +2965,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, // update the chain results when the pattern is complete. SmallVector<SDNode*, 3> ChainNodesMatched; - DEBUG(dbgs() << "ISEL: Starting pattern match on root node: "; - NodeToMatch->dump(CurDAG); - dbgs() << '\n'); + LLVM_DEBUG(dbgs() << "ISEL: Starting pattern match\n"); // Determine where to start the interpreter. Normally we start at opcode #0, // but if the state machine starts with an OPC_SwitchOpcode, then we @@ -3002,7 +2977,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, // Already computed the OpcodeOffset table, just index into it. if (N.getOpcode() < OpcodeOffset.size()) MatcherIndex = OpcodeOffset[N.getOpcode()]; - DEBUG(dbgs() << " Initial Opcode index to " << MatcherIndex << "\n"); + LLVM_DEBUG(dbgs() << " Initial Opcode index to " << MatcherIndex << "\n"); } else if (MatcherTable[0] == OPC_SwitchOpcode) { // Otherwise, the table isn't computed, but the state machine does start @@ -3069,9 +3044,10 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, if (!Result) break; - DEBUG(dbgs() << " Skipped scope entry (due to false predicate) at " - << "index " << MatcherIndexOfPredicate - << ", continuing at " << FailIndex << "\n"); + LLVM_DEBUG( + dbgs() << " Skipped scope entry (due to false predicate) at " + << "index " << MatcherIndexOfPredicate << ", continuing at " + << FailIndex << "\n"); ++NumDAGIselRetries; // Otherwise, we know that this case of the Scope is guaranteed to fail, @@ -3120,11 +3096,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, if (auto *MN = dyn_cast<MemSDNode>(N)) MatchedMemRefs.push_back(MN->getMemOperand()); else { - DEBUG( - dbgs() << "Expected MemSDNode "; - N->dump(CurDAG); - dbgs() << '\n' - ); + LLVM_DEBUG(dbgs() << "Expected MemSDNode "; N->dump(CurDAG); + dbgs() << '\n'); } continue; @@ -3245,8 +3218,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, if (CaseSize == 0) break; // Otherwise, execute the case we found. - DEBUG(dbgs() << " OpcodeSwitch from " << SwitchStart - << " to " << MatcherIndex << "\n"); + LLVM_DEBUG(dbgs() << " OpcodeSwitch from " << SwitchStart << " to " + << MatcherIndex << "\n"); continue; } @@ -3277,8 +3250,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, if (CaseSize == 0) break; // Otherwise, execute the case we found. - DEBUG(dbgs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString() - << "] from " << SwitchStart << " to " << MatcherIndex<<'\n'); + LLVM_DEBUG(dbgs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString() + << "] from " << SwitchStart << " to " << MatcherIndex + << '\n'); continue; } case OPC_CheckChild0Type: case OPC_CheckChild1Type: @@ -3658,16 +3632,11 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, Res->setMemRefs(MemRefs, MemRefs + NumMemRefs); } - DEBUG( - if (!MatchedMemRefs.empty() && Res->memoperands_empty()) - dbgs() << " Dropping mem operands\n"; - dbgs() << " " - << (IsMorphNodeTo ? "Morphed" : "Created") - << " node: "; - Res->dump(CurDAG); - - dbgs() << '\n'; - ); + LLVM_DEBUG(if (!MatchedMemRefs.empty() && Res->memoperands_empty()) dbgs() + << " Dropping mem operands\n"; + dbgs() << " " << (IsMorphNodeTo ? "Morphed" : "Created") + << " node: "; + Res->dump(CurDAG);); // If this was a MorphNodeTo then we're completely done! if (IsMorphNodeTo) { @@ -3702,7 +3671,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, NodeToMatch->getValueType(i).getSizeInBits() == Res.getValueSizeInBits()) && "invalid replacement"); - CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, i), Res); + ReplaceUses(SDValue(NodeToMatch, i), Res); } // Update chain uses. @@ -3715,8 +3684,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, if (NodeToMatch->getValueType(NodeToMatch->getNumValues() - 1) == MVT::Glue && InputGlue.getNode()) - CurDAG->ReplaceAllUsesOfValueWith( - SDValue(NodeToMatch, NodeToMatch->getNumValues() - 1), InputGlue); + ReplaceUses(SDValue(NodeToMatch, NodeToMatch->getNumValues() - 1), + InputGlue); assert(NodeToMatch->use_empty() && "Didn't replace all uses of the node?"); @@ -3729,7 +3698,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, // If the code reached this point, then the match failed. See if there is // another child to try in the current 'Scope', otherwise pop it until we // find a case to check. - DEBUG(dbgs() << " Match failed at index " << CurrentOpcodeIndex << "\n"); + LLVM_DEBUG(dbgs() << " Match failed at index " << CurrentOpcodeIndex + << "\n"); ++NumDAGIselRetries; while (true) { if (MatchScopes.empty()) { @@ -3749,7 +3719,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, MatchedMemRefs.resize(LastScope.NumMatchedMemRefs); MatcherIndex = LastScope.FailIndex; - DEBUG(dbgs() << " Continuing at " << MatcherIndex << "\n"); + LLVM_DEBUG(dbgs() << " Continuing at " << MatcherIndex << "\n"); InputChain = LastScope.InputChain; InputGlue = LastScope.InputGlue; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index be4ab094bf49..3b19bff4743d 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -229,7 +229,7 @@ bool SelectionDAG::setSubgraphColorHelper(SDNode *N, const char *Color, DenseSet if (level >= 20) { if (!printed) { printed = true; - DEBUG(dbgs() << "setSubgraphColor hit max level\n"); + LLVM_DEBUG(dbgs() << "setSubgraphColor hit max level\n"); } return true; } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 3f64b49e3555..5cf06e62b80c 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -28,7 +28,6 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -43,6 +42,7 @@ #include "llvm/IR/Statepoint.h" #include "llvm/IR/Type.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/MachineValueType.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include <cassert> diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index d76e52d78870..fa867fcec366 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -20,7 +20,6 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/TargetLoweringObjectFile.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" @@ -32,6 +31,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include <cctype> using namespace llvm; @@ -96,7 +96,7 @@ bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI, return true; } -/// \brief Set CallLoweringInfo attribute flags based on a call instruction +/// Set CallLoweringInfo attribute flags based on a call instruction /// and called function attributes. void TargetLoweringBase::ArgListEntry::setAttributes(ImmutableCallSite *CS, unsigned ArgIdx) { @@ -524,6 +524,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } // Other users may use these bits. + EVT VT = Op.getValueType(); if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) { if (Depth != 0) { // If not at the root, Just compute the Known bits to @@ -537,7 +538,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } else if (DemandedMask == 0) { // Not demanding any bits from Op. if (!Op.isUndef()) - return TLO.CombineTo(Op, TLO.DAG.getUNDEF(Op.getValueType())); + return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); return false; } else if (Depth == 6) { // Limit search depth. return false; @@ -580,7 +581,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, KnownBits LHSKnown; // Do not increment Depth here; that can cause an infinite loop. TLO.DAG.computeKnownBits(Op0, LHSKnown, Depth); - // If the LHS already has zeros where RHSC does, this and is dead. + // If the LHS already has zeros where RHSC does, this 'and' is dead. if ((LHSKnown.Zero & NewMask) == (~RHSC->getAPIntValue() & NewMask)) return TLO.CombineTo(Op, Op0); @@ -596,8 +597,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1 if (isBitwiseNot(Op0) && Op0.hasOneUse() && LHSKnown.One == ~RHSC->getAPIntValue()) { - SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, Op.getValueType(), - Op0.getOperand(0), Op.getOperand(1)); + SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), + Op.getOperand(1)); return TLO.CombineTo(Op, Xor); } } @@ -618,7 +619,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, return TLO.CombineTo(Op, Op.getOperand(1)); // If all of the demanded bits in the inputs are known zeros, return zero. if (NewMask.isSubsetOf(Known.Zero | Known2.Zero)) - return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, Op.getValueType())); + return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT)); // If the RHS is a constant, see if we can simplify it. if (ShrinkDemandedConstant(Op, ~Known2.Zero & NewMask, TLO)) return true; @@ -680,7 +681,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // (but not both) turn this into an *inclusive* or. // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 if ((NewMask & ~Known.Zero & ~Known2.Zero) == 0) - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, Op.getValueType(), + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op.getOperand(0), Op.getOperand(1))); @@ -696,7 +697,6 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // NB: it is okay if more bits are known than are requested if (NewMask.isSubsetOf(Known.Zero|Known.One)) { // all known on one side if (Known.One == Known2.One) { // set bits are the same on both sides - EVT VT = Op.getValueType(); SDValue ANDC = TLO.DAG.getConstant(~Known.One & NewMask, dl, VT); return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), ANDC)); @@ -710,7 +710,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (C && !C->isAllOnesValue()) { if (NewMask.isSubsetOf(C->getAPIntValue())) { // We're flipping all demanded bits. Flip the undemanded bits too. - SDValue New = TLO.DAG.getNOT(dl, Op.getOperand(0), Op.getValueType()); + SDValue New = TLO.DAG.getNOT(dl, Op.getOperand(0), VT); return TLO.CombineTo(Op, New); } // If we can't turn this into a 'not', try to shrink the constant. @@ -761,7 +761,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // width as the setcc result, and (3) the result of a setcc conforms to 0 or // -1, we may be able to bypass the setcc. if (NewMask.isSignMask() && Op0.getScalarValueSizeInBits() == BitWidth && - getBooleanContents(Op.getValueType()) == + getBooleanContents(VT) == BooleanContent::ZeroOrNegativeOneBooleanContent) { // If we're testing X < 0, then this compare isn't needed - just use X! // FIXME: We're limiting to integer types here, but this should also work @@ -807,7 +807,6 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, SDValue NewSA = TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType()); - EVT VT = Op.getValueType(); return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, InOp.getOperand(0), NewSA)); @@ -835,8 +834,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, TLO.DAG.getConstant(ShAmt, dl, ShTy)); return TLO.CombineTo(Op, - TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), - NarrowShl)); + TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl)); } // Repeat the SHL optimization above in cases where an extension // intervenes: (shl (anyext (shr x, c1)), c2) to @@ -854,7 +852,6 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, SDValue NewSA = TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, Op.getOperand(1).getValueType()); - EVT VT = Op.getValueType(); SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, InnerOp.getOperand(0)); return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, @@ -904,7 +901,6 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, SDValue NewSA = TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType()); - EVT VT = Op.getValueType(); return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, InOp.getOperand(0), NewSA)); @@ -930,12 +926,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // the shift amount is >= the size of the datatype, which is undefined. if (NewMask.isOneValue()) return TLO.CombineTo(Op, - TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(), - Op.getOperand(0), Op.getOperand(1))); + TLO.DAG.getNode(ISD::SRL, dl, VT, Op.getOperand(0), + Op.getOperand(1))); if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) { - EVT VT = Op.getValueType(); - // If the shift count is an invalid immediate, don't do anything. if (SA->getAPIntValue().uge(BitWidth)) break; @@ -1000,14 +994,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (!AlreadySignExtended) { // Compute the correct shift amount type, which must be getShiftAmountTy // for scalar types after legalization. - EVT ShiftAmtTy = Op.getValueType(); + EVT ShiftAmtTy = VT; if (TLO.LegalTypes() && !ShiftAmtTy.isVector()) ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL); SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ExVTBits, dl, ShiftAmtTy); - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, - Op.getValueType(), InOp, + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, InOp, ShiftAmt)); } } @@ -1072,8 +1065,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If none of the top bits are demanded, convert this into an any_extend. if (NewMask.getActiveBits() <= OperandBitWidth) - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, - Op.getValueType(), + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op.getOperand(0))); APInt InMask = NewMask.trunc(OperandBitWidth); @@ -1089,8 +1081,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If none of the top bits are demanded, convert this into an any_extend. if (NewMask.getActiveBits() <= InBits) - return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl, - Op.getValueType(), + return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op.getOperand(0))); // Since some of the sign extended bits are demanded, we know that the sign @@ -1107,8 +1098,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If the sign bit is known zero, convert this to a zero extend. if (Known.isNonNegative()) - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, - Op.getValueType(), + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0))); break; } @@ -1139,8 +1129,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, case ISD::SRL: // Shrink SRL by a constant if none of the high bits shifted in are // demanded. - if (TLO.LegalTypes() && - !isTypeDesirableForOp(ISD::SRL, Op.getValueType())) + if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT)) // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is // undesirable. break; @@ -1150,8 +1139,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, SDValue Shift = In.getOperand(1); if (TLO.LegalTypes()) { uint64_t ShVal = ShAmt->getZExtValue(); - Shift = TLO.DAG.getConstant(ShVal, dl, - getShiftAmountTy(Op.getValueType(), DL)); + Shift = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL)); } if (ShAmt->getZExtValue() < BitWidth) { @@ -1163,12 +1151,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (!(HighBits & NewMask)) { // None of the shifted in bits are needed. Add a truncate of the // shift input, then shift it. - SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, - Op.getValueType(), + SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, In.getOperand(0)); - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, - Op.getValueType(), - NewTrunc, + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, Shift)); } } @@ -1182,9 +1167,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, case ISD::AssertZext: { // AssertZext demands all of the high bits, plus any of the low bits // demanded by its users. - EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); - APInt InMask = APInt::getLowBitsSet(BitWidth, - VT.getSizeInBits()); + EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits()); if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | NewMask, Known, TLO, Depth+1)) return true; @@ -1196,40 +1180,45 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, case ISD::BITCAST: // If this is an FP->Int bitcast and if the sign bit is the only // thing demanded, turn this into a FGETSIGN. - if (!TLO.LegalOperations() && - !Op.getValueType().isVector() && + if (!TLO.LegalOperations() && !VT.isVector() && !Op.getOperand(0).getValueType().isVector() && NewMask == APInt::getSignMask(Op.getValueSizeInBits()) && Op.getOperand(0).getValueType().isFloatingPoint()) { - bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType()); + bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT); bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32); - if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple() && + if ((OpVTLegal || i32Legal) && VT.isSimple() && + Op.getOperand(0).getValueType() != MVT::f16 && Op.getOperand(0).getValueType() != MVT::f128) { // Cannot eliminate/lower SHL for f128 yet. - EVT Ty = OpVTLegal ? Op.getValueType() : MVT::i32; + EVT Ty = OpVTLegal ? VT : MVT::i32; // Make a FGETSIGN + SHL to move the sign bit into the appropriate // place. We expect the SHL to be eliminated by other optimizations. SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Op.getOperand(0)); unsigned OpVTSizeInBits = Op.getValueSizeInBits(); if (!OpVTLegal && OpVTSizeInBits > 32) - Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), Sign); + Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign); unsigned ShVal = Op.getValueSizeInBits() - 1; - SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, Op.getValueType()); - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, - Op.getValueType(), - Sign, ShAmt)); + SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt)); } } + // If this is a bitcast, let computeKnownBits handle it. Only do this on a + // recursive call where Known may be useful to the caller. + if (Depth > 0) { + TLO.DAG.computeKnownBits(Op, Known, Depth); + return false; + } break; case ISD::ADD: case ISD::MUL: case ISD::SUB: { // Add, Sub, and Mul don't demand any bits in positions beyond that // of the highest bit demanded of them. - APInt LoMask = APInt::getLowBitsSet(BitWidth, - BitWidth - NewMask.countLeadingZeros()); - if (SimplifyDemandedBits(Op.getOperand(0), LoMask, Known2, TLO, Depth+1) || - SimplifyDemandedBits(Op.getOperand(1), LoMask, Known2, TLO, Depth+1) || + SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1); + unsigned NewMaskLZ = NewMask.countLeadingZeros(); + APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - NewMaskLZ); + if (SimplifyDemandedBits(Op0, LoMask, Known2, TLO, Depth + 1) || + SimplifyDemandedBits(Op1, LoMask, Known2, TLO, Depth + 1) || // See if the operation should be performed at a smaller bit width. ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) { SDNodeFlags Flags = Op.getNode()->getFlags(); @@ -1238,13 +1227,33 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // won't wrap after simplification. Flags.setNoSignedWrap(false); Flags.setNoUnsignedWrap(false); - SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), - Op.getOperand(0), Op.getOperand(1), + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags); return TLO.CombineTo(Op, NewOp); } return true; } + + // If we have a constant operand, we may be able to turn it into -1 if we + // do not demand the high bits. This can make the constant smaller to + // encode, allow more general folding, or match specialized instruction + // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that + // is probably not useful (and could be detrimental). + ConstantSDNode *C = isConstOrConstSplat(Op1); + APInt HighMask = APInt::getHighBitsSet(NewMask.getBitWidth(), NewMaskLZ); + if (C && !C->isAllOnesValue() && !C->isOne() && + (C->getAPIntValue() | HighMask).isAllOnesValue()) { + SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT); + // We can't guarantee that the new math op doesn't wrap, so explicitly + // clear those flags to prevent folding with a potential existing node + // that has those flags set. + SDNodeFlags Flags; + Flags.setNoSignedWrap(false); + Flags.setNoUnsignedWrap(false); + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags); + return TLO.CombineTo(Op, NewOp); + } + LLVM_FALLTHROUGH; } default: @@ -1265,10 +1274,384 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (C->isOpaque()) return false; } - return TLO.CombineTo(Op, - TLO.DAG.getConstant(Known.One, dl, Op.getValueType())); + return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT)); + } + + return false; +} + +bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op, + const APInt &DemandedElts, + APInt &KnownUndef, + APInt &KnownZero, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), + !DCI.isBeforeLegalizeOps()); + + bool Simplified = + SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO); + if (Simplified) + DCI.CommitTargetLoweringOpt(TLO); + return Simplified; +} + +bool TargetLowering::SimplifyDemandedVectorElts( + SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, + APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth, + bool AssumeSingleUse) const { + EVT VT = Op.getValueType(); + APInt DemandedElts = DemandedEltMask; + unsigned NumElts = DemandedElts.getBitWidth(); + assert(VT.isVector() && "Expected vector op"); + assert(VT.getVectorNumElements() == NumElts && + "Mask size mismatches value type element count!"); + + KnownUndef = KnownZero = APInt::getNullValue(NumElts); + + // Undef operand. + if (Op.isUndef()) { + KnownUndef.setAllBits(); + return false; + } + + // If Op has other users, assume that all elements are needed. + if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) + DemandedElts.setAllBits(); + + // Not demanding any elements from Op. + if (DemandedElts == 0) { + KnownUndef.setAllBits(); + return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); + } + + // Limit search depth. + if (Depth >= 6) + return false; + + SDLoc DL(Op); + unsigned EltSizeInBits = VT.getScalarSizeInBits(); + + switch (Op.getOpcode()) { + case ISD::SCALAR_TO_VECTOR: { + if (!DemandedElts[0]) { + KnownUndef.setAllBits(); + return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); + } + KnownUndef.setHighBits(NumElts - 1); + break; + } + case ISD::BITCAST: { + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + + // We only handle vectors here. + // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits? + if (!SrcVT.isVector()) + break; + + // Fast handling of 'identity' bitcasts. + unsigned NumSrcElts = SrcVT.getVectorNumElements(); + if (NumSrcElts == NumElts) + return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, + KnownZero, TLO, Depth + 1); + + APInt SrcZero, SrcUndef; + APInt SrcDemandedElts = APInt::getNullValue(NumSrcElts); + + // Bitcast from 'large element' src vector to 'small element' vector, we + // must demand a source element if any DemandedElt maps to it. + if ((NumElts % NumSrcElts) == 0) { + unsigned Scale = NumElts / NumSrcElts; + for (unsigned i = 0; i != NumElts; ++i) + if (DemandedElts[i]) + SrcDemandedElts.setBit(i / Scale); + + if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero, + TLO, Depth + 1)) + return true; + + // If the src element is zero/undef then all the output elements will be - + // only demanded elements are guaranteed to be correct. + for (unsigned i = 0; i != NumSrcElts; ++i) { + if (SrcDemandedElts[i]) { + if (SrcZero[i]) + KnownZero.setBits(i * Scale, (i + 1) * Scale); + if (SrcUndef[i]) + KnownUndef.setBits(i * Scale, (i + 1) * Scale); + } + } + } + + // Bitcast from 'small element' src vector to 'large element' vector, we + // demand all smaller source elements covered by the larger demanded element + // of this vector. + if ((NumSrcElts % NumElts) == 0) { + unsigned Scale = NumSrcElts / NumElts; + for (unsigned i = 0; i != NumElts; ++i) + if (DemandedElts[i]) + SrcDemandedElts.setBits(i * Scale, (i + 1) * Scale); + + if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero, + TLO, Depth + 1)) + return true; + + // If all the src elements covering an output element are zero/undef, then + // the output element will be as well, assuming it was demanded. + for (unsigned i = 0; i != NumElts; ++i) { + if (DemandedElts[i]) { + if (SrcZero.extractBits(Scale, i * Scale).isAllOnesValue()) + KnownZero.setBit(i); + if (SrcUndef.extractBits(Scale, i * Scale).isAllOnesValue()) + KnownUndef.setBit(i); + } + } + } + break; + } + case ISD::BUILD_VECTOR: { + // Check all elements and simplify any unused elements with UNDEF. + if (!DemandedElts.isAllOnesValue()) { + // Don't simplify BROADCASTS. + if (llvm::any_of(Op->op_values(), + [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) { + SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end()); + bool Updated = false; + for (unsigned i = 0; i != NumElts; ++i) { + if (!DemandedElts[i] && !Ops[i].isUndef()) { + Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType()); + KnownUndef.setBit(i); + Updated = true; + } + } + if (Updated) + return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops)); + } + } + for (unsigned i = 0; i != NumElts; ++i) { + SDValue SrcOp = Op.getOperand(i); + if (SrcOp.isUndef()) { + KnownUndef.setBit(i); + } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() && + (isNullConstant(SrcOp) || isNullFPConstant(SrcOp))) { + KnownZero.setBit(i); + } + } + break; + } + case ISD::CONCAT_VECTORS: { + EVT SubVT = Op.getOperand(0).getValueType(); + unsigned NumSubVecs = Op.getNumOperands(); + unsigned NumSubElts = SubVT.getVectorNumElements(); + for (unsigned i = 0; i != NumSubVecs; ++i) { + SDValue SubOp = Op.getOperand(i); + APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts); + APInt SubUndef, SubZero; + if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO, + Depth + 1)) + return true; + KnownUndef.insertBits(SubUndef, i * NumSubElts); + KnownZero.insertBits(SubZero, i * NumSubElts); + } + break; + } + case ISD::INSERT_SUBVECTOR: { + if (!isa<ConstantSDNode>(Op.getOperand(2))) + break; + SDValue Base = Op.getOperand(0); + SDValue Sub = Op.getOperand(1); + EVT SubVT = Sub.getValueType(); + unsigned NumSubElts = SubVT.getVectorNumElements(); + const APInt& Idx = cast<ConstantSDNode>(Op.getOperand(2))->getAPIntValue(); + if (Idx.uge(NumElts - NumSubElts)) + break; + unsigned SubIdx = Idx.getZExtValue(); + APInt SubElts = DemandedElts.extractBits(NumSubElts, SubIdx); + APInt SubUndef, SubZero; + if (SimplifyDemandedVectorElts(Sub, SubElts, SubUndef, SubZero, TLO, + Depth + 1)) + return true; + APInt BaseElts = DemandedElts; + BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx); + if (SimplifyDemandedVectorElts(Base, BaseElts, KnownUndef, KnownZero, TLO, + Depth + 1)) + return true; + KnownUndef.insertBits(SubUndef, SubIdx); + KnownZero.insertBits(SubZero, SubIdx); + break; + } + case ISD::EXTRACT_SUBVECTOR: { + if (!isa<ConstantSDNode>(Op.getOperand(1))) + break; + SDValue Src = Op.getOperand(0); + unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + const APInt& Idx = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue(); + if (Idx.uge(NumSrcElts - NumElts)) + break; + // Offset the demanded elts by the subvector index. + uint64_t SubIdx = Idx.getZExtValue(); + APInt SrcElts = DemandedElts.zext(NumSrcElts).shl(SubIdx); + APInt SrcUndef, SrcZero; + if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO, + Depth + 1)) + return true; + KnownUndef = SrcUndef.extractBits(NumElts, SubIdx); + KnownZero = SrcZero.extractBits(NumElts, SubIdx); + break; + } + case ISD::INSERT_VECTOR_ELT: { + SDValue Vec = Op.getOperand(0); + SDValue Scl = Op.getOperand(1); + auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2)); + + // For a legal, constant insertion index, if we don't need this insertion + // then strip it, else remove it from the demanded elts. + if (CIdx && CIdx->getAPIntValue().ult(NumElts)) { + unsigned Idx = CIdx->getZExtValue(); + if (!DemandedElts[Idx]) + return TLO.CombineTo(Op, Vec); + DemandedElts.clearBit(Idx); + + if (SimplifyDemandedVectorElts(Vec, DemandedElts, KnownUndef, + KnownZero, TLO, Depth + 1)) + return true; + + KnownUndef.clearBit(Idx); + if (Scl.isUndef()) + KnownUndef.setBit(Idx); + + KnownZero.clearBit(Idx); + if (isNullConstant(Scl) || isNullFPConstant(Scl)) + KnownZero.setBit(Idx); + break; + } + + APInt VecUndef, VecZero; + if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO, + Depth + 1)) + return true; + // Without knowing the insertion index we can't set KnownUndef/KnownZero. + break; + } + case ISD::VSELECT: { + APInt DemandedLHS(DemandedElts); + APInt DemandedRHS(DemandedElts); + + // TODO - add support for constant vselect masks. + + // See if we can simplify either vselect operand. + APInt UndefLHS, ZeroLHS; + APInt UndefRHS, ZeroRHS; + if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS, + ZeroLHS, TLO, Depth + 1)) + return true; + if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedRHS, UndefRHS, + ZeroRHS, TLO, Depth + 1)) + return true; + + KnownUndef = UndefLHS & UndefRHS; + KnownZero = ZeroLHS & ZeroRHS; + break; + } + case ISD::VECTOR_SHUFFLE: { + ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask(); + + // Collect demanded elements from shuffle operands.. + APInt DemandedLHS(NumElts, 0); + APInt DemandedRHS(NumElts, 0); + for (unsigned i = 0; i != NumElts; ++i) { + int M = ShuffleMask[i]; + if (M < 0 || !DemandedElts[i]) + continue; + assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range"); + if (M < (int)NumElts) + DemandedLHS.setBit(M); + else + DemandedRHS.setBit(M - NumElts); + } + + // See if we can simplify either shuffle operand. + APInt UndefLHS, ZeroLHS; + APInt UndefRHS, ZeroRHS; + if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, UndefLHS, + ZeroLHS, TLO, Depth + 1)) + return true; + if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, UndefRHS, + ZeroRHS, TLO, Depth + 1)) + return true; + + // Simplify mask using undef elements from LHS/RHS. + bool Updated = false; + bool IdentityLHS = true, IdentityRHS = true; + SmallVector<int, 32> NewMask(ShuffleMask.begin(), ShuffleMask.end()); + for (unsigned i = 0; i != NumElts; ++i) { + int &M = NewMask[i]; + if (M < 0) + continue; + if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) || + (M >= (int)NumElts && UndefRHS[M - NumElts])) { + Updated = true; + M = -1; + } + IdentityLHS &= (M < 0) || (M == (int)i); + IdentityRHS &= (M < 0) || ((M - NumElts) == i); + } + + // Update legal shuffle masks based on demanded elements if it won't reduce + // to Identity which can cause premature removal of the shuffle mask. + if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps && + isShuffleMaskLegal(NewMask, VT)) + return TLO.CombineTo(Op, + TLO.DAG.getVectorShuffle(VT, DL, Op.getOperand(0), + Op.getOperand(1), NewMask)); + + // Propagate undef/zero elements from LHS/RHS. + for (unsigned i = 0; i != NumElts; ++i) { + int M = ShuffleMask[i]; + if (M < 0) { + KnownUndef.setBit(i); + } else if (M < (int)NumElts) { + if (UndefLHS[M]) + KnownUndef.setBit(i); + if (ZeroLHS[M]) + KnownZero.setBit(i); + } else { + if (UndefRHS[M - NumElts]) + KnownUndef.setBit(i); + if (ZeroRHS[M - NumElts]) + KnownZero.setBit(i); + } + } + break; + } + case ISD::ADD: + case ISD::SUB: { + APInt SrcUndef, SrcZero; + if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef, + SrcZero, TLO, Depth + 1)) + return true; + if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef, + KnownZero, TLO, Depth + 1)) + return true; + KnownZero &= SrcZero; + KnownUndef &= SrcUndef; + break; + } + case ISD::TRUNCATE: + if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef, + KnownZero, TLO, Depth + 1)) + return true; + break; + default: { + if (Op.getOpcode() >= ISD::BUILTIN_OP_END) + if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef, + KnownZero, TLO, Depth)) + return true; + break; + } } + assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero"); return false; } @@ -1316,6 +1699,18 @@ unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, return 1; } +bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode( + SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, + TargetLoweringOpt &TLO, unsigned Depth) const { + assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) && + "Should use SimplifyDemandedVectorElts if you don't know whether Op" + " is a target node!"); + return false; +} + // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must // work with truncating build vectors and vectors with elements of less than // 8 bits. @@ -1353,16 +1748,6 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const { llvm_unreachable("Invalid boolean contents"); } -SDValue TargetLowering::getConstTrueVal(SelectionDAG &DAG, EVT VT, - const SDLoc &DL) const { - unsigned ElementWidth = VT.getScalarSizeInBits(); - APInt TrueInt = - getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent - ? APInt(ElementWidth, 1) - : APInt::getAllOnesValue(ElementWidth); - return DAG.getConstant(TrueInt, DL, VT); -} - bool TargetLowering::isConstFalseVal(const SDNode *N) const { if (!N) return false; @@ -1466,6 +1851,89 @@ SDValue TargetLowering::simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1, return SDValue(); } +/// There are multiple IR patterns that could be checking whether certain +/// truncation of a signed number would be lossy or not. The pattern which is +/// best at IR level, may not lower optimally. Thus, we want to unfold it. +/// We are looking for the following pattern: (KeptBits is a constant) +/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits) +/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false. +/// KeptBits also can't be 1, that would have been folded to %x dstcond 0 +/// We will unfold it into the natural trunc+sext pattern: +/// ((%x << C) a>> C) dstcond %x +/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x) +SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck( + EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI, + const SDLoc &DL) const { + // We must be comparing with a constant. + ConstantSDNode *C1; + if (!(C1 = dyn_cast<ConstantSDNode>(N1))) + return SDValue(); + + // N0 should be: add %x, (1 << (KeptBits-1)) + if (N0->getOpcode() != ISD::ADD) + return SDValue(); + + // And we must be 'add'ing a constant. + ConstantSDNode *C01; + if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1)))) + return SDValue(); + + SDValue X = N0->getOperand(0); + EVT XVT = X.getValueType(); + + // Validate constants ... + + APInt I1 = C1->getAPIntValue(); + + ISD::CondCode NewCond; + if (Cond == ISD::CondCode::SETULT) { + NewCond = ISD::CondCode::SETEQ; + } else if (Cond == ISD::CondCode::SETULE) { + NewCond = ISD::CondCode::SETEQ; + // But need to 'canonicalize' the constant. + I1 += 1; + } else if (Cond == ISD::CondCode::SETUGT) { + NewCond = ISD::CondCode::SETNE; + // But need to 'canonicalize' the constant. + I1 += 1; + } else if (Cond == ISD::CondCode::SETUGE) { + NewCond = ISD::CondCode::SETNE; + } else + return SDValue(); + + const APInt &I01 = C01->getAPIntValue(); + // Both of them must be power-of-two, and the constant from setcc is bigger. + if (!(I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2())) + return SDValue(); + + // They are power-of-two, so which bit is set? + const unsigned KeptBits = I1.logBase2(); + const unsigned KeptBitsMinusOne = I01.logBase2(); + + // Magic! + if (KeptBits != (KeptBitsMinusOne + 1)) + return SDValue(); + assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable"); + + // We don't want to do this in every single case. + SelectionDAG &DAG = DCI.DAG; + if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck( + XVT, KeptBits)) + return SDValue(); + + const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits; + assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable"); + + // Unfold into: ((%x << C) a>> C) cond %x + // Where 'cond' will be either 'eq' or 'ne'. + SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT); + SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt); + SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt); + SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond); + + return T2; +} + /// Try to simplify a setcc built with the specified operands and cc. If it is /// unable to simplify it, return a null SDValue. SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, @@ -1473,25 +1941,21 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, DAGCombinerInfo &DCI, const SDLoc &dl) const { SelectionDAG &DAG = DCI.DAG; + EVT OpVT = N0.getValueType(); // These setcc operations always fold. switch (Cond) { default: break; case ISD::SETFALSE: - case ISD::SETFALSE2: return DAG.getConstant(0, dl, VT); + case ISD::SETFALSE2: return DAG.getBoolConstant(false, dl, VT, OpVT); case ISD::SETTRUE: - case ISD::SETTRUE2: { - TargetLowering::BooleanContent Cnt = - getBooleanContents(N0->getValueType(0)); - return DAG.getConstant( - Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, dl, - VT); - } + case ISD::SETTRUE2: return DAG.getBoolConstant(true, dl, VT, OpVT); } // Ensure that the constant occurs on the RHS and fold constant comparisons. + // TODO: Handle non-splat vector constants. All undef causes trouble. ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond); - if (isa<ConstantSDNode>(N0.getNode()) && + if (isConstOrConstSplat(N0) && (DCI.isBeforeLegalizeOps() || isCondCodeLegal(SwappedCC, N0.getSimpleValueType()))) return DAG.getSetCC(dl, VT, N1, N0, SwappedCC); @@ -1737,7 +2201,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, EVT newVT = N0.getOperand(0).getValueType(); if (DCI.isBeforeLegalizeOps() || (isOperationLegal(ISD::SETCC, newVT) && - getCondCodeAction(Cond, newVT.getSimpleVT()) == Legal)) { + isCondCodeLegal(Cond, newVT.getSimpleVT()))) { EVT NewSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), newVT); SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT); @@ -1867,8 +2331,18 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } + if (SDValue V = + optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl)) + return V; + } + + // These simplifications apply to splat vectors as well. + // TODO: Handle more splat vector cases. + if (auto *N1C = isConstOrConstSplat(N1)) { + const APInt &C1 = N1C->getAPIntValue(); + APInt MinVal, MaxVal; - unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits(); + unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits(); if (ISD::isSignedIntSetCC(Cond)) { MinVal = APInt::getSignedMinValue(OperandBitSize); MaxVal = APInt::getSignedMaxValue(OperandBitSize); @@ -1881,84 +2355,105 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (Cond == ISD::SETGE || Cond == ISD::SETUGE) { // X >= MIN --> true if (C1 == MinVal) - return DAG.getConstant(1, dl, VT); - - // X >= C0 --> X > (C0 - 1) - APInt C = C1 - 1; - ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT; - if ((DCI.isBeforeLegalizeOps() || - isCondCodeLegal(NewCC, VT.getSimpleVT())) && - (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 && - isLegalICmpImmediate(C.getSExtValue())))) { - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(C, dl, N1.getValueType()), - NewCC); + return DAG.getBoolConstant(true, dl, VT, OpVT); + + if (!VT.isVector()) { // TODO: Support this for vectors. + // X >= C0 --> X > (C0 - 1) + APInt C = C1 - 1; + ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT; + if ((DCI.isBeforeLegalizeOps() || + isCondCodeLegal(NewCC, VT.getSimpleVT())) && + (!N1C->isOpaque() || (C.getBitWidth() <= 64 && + isLegalICmpImmediate(C.getSExtValue())))) { + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(C, dl, N1.getValueType()), + NewCC); + } } } if (Cond == ISD::SETLE || Cond == ISD::SETULE) { // X <= MAX --> true if (C1 == MaxVal) - return DAG.getConstant(1, dl, VT); + return DAG.getBoolConstant(true, dl, VT, OpVT); // X <= C0 --> X < (C0 + 1) - APInt C = C1 + 1; - ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT; - if ((DCI.isBeforeLegalizeOps() || - isCondCodeLegal(NewCC, VT.getSimpleVT())) && - (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 && - isLegalICmpImmediate(C.getSExtValue())))) { - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(C, dl, N1.getValueType()), - NewCC); - } - } - - if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal) - return DAG.getConstant(0, dl, VT); // X < MIN --> false - if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal) - return DAG.getConstant(1, dl, VT); // X >= MIN --> true - if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal) - return DAG.getConstant(0, dl, VT); // X > MAX --> false - if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal) - return DAG.getConstant(1, dl, VT); // X <= MAX --> true - - // Canonicalize setgt X, Min --> setne X, Min - if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal) - return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); - // Canonicalize setlt X, Max --> setne X, Max - if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal) - return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); - - // If we have setult X, 1, turn it into seteq X, 0 - if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1) - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(MinVal, dl, N0.getValueType()), - ISD::SETEQ); - // If we have setugt X, Max-1, turn it into seteq X, Max - if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1) - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(MaxVal, dl, N0.getValueType()), - ISD::SETEQ); + if (!VT.isVector()) { // TODO: Support this for vectors. + APInt C = C1 + 1; + ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT; + if ((DCI.isBeforeLegalizeOps() || + isCondCodeLegal(NewCC, VT.getSimpleVT())) && + (!N1C->isOpaque() || (C.getBitWidth() <= 64 && + isLegalICmpImmediate(C.getSExtValue())))) { + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(C, dl, N1.getValueType()), + NewCC); + } + } + } - // If we have "setcc X, C0", check to see if we can shrink the immediate - // by changing cc. + if (Cond == ISD::SETLT || Cond == ISD::SETULT) { + if (C1 == MinVal) + return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false + + // TODO: Support this for vectors after legalize ops. + if (!VT.isVector() || DCI.isBeforeLegalizeOps()) { + // Canonicalize setlt X, Max --> setne X, Max + if (C1 == MaxVal) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); + + // If we have setult X, 1, turn it into seteq X, 0 + if (C1 == MinVal+1) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(MinVal, dl, N0.getValueType()), + ISD::SETEQ); + } + } - // SETUGT X, SINTMAX -> SETLT X, 0 - if (Cond == ISD::SETUGT && - C1 == APInt::getSignedMaxValue(OperandBitSize)) - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(0, dl, N1.getValueType()), - ISD::SETLT); + if (Cond == ISD::SETGT || Cond == ISD::SETUGT) { + if (C1 == MaxVal) + return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false + + // TODO: Support this for vectors after legalize ops. + if (!VT.isVector() || DCI.isBeforeLegalizeOps()) { + // Canonicalize setgt X, Min --> setne X, Min + if (C1 == MinVal) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); + + // If we have setugt X, Max-1, turn it into seteq X, Max + if (C1 == MaxVal-1) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(MaxVal, dl, N0.getValueType()), + ISD::SETEQ); + } + } - // SETULT X, SINTMIN -> SETGT X, -1 - if (Cond == ISD::SETULT && - C1 == APInt::getSignedMinValue(OperandBitSize)) { - SDValue ConstMinusOne = - DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl, - N1.getValueType()); - return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT); + // If we have "setcc X, C0", check to see if we can shrink the immediate + // by changing cc. + // TODO: Support this for vectors after legalize ops. + if (!VT.isVector() || DCI.isBeforeLegalizeOps()) { + // SETUGT X, SINTMAX -> SETLT X, 0 + if (Cond == ISD::SETUGT && + C1 == APInt::getSignedMaxValue(OperandBitSize)) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(0, dl, N1.getValueType()), + ISD::SETLT); + + // SETULT X, SINTMIN -> SETGT X, -1 + if (Cond == ISD::SETULT && + C1 == APInt::getSignedMinValue(OperandBitSize)) { + SDValue ConstMinusOne = + DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl, + N1.getValueType()); + return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT); + } } + } + + // Back to non-vector simplifications. + // TODO: Can we do these for vector splats? + if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { + const APInt &C1 = N1C->getAPIntValue(); // Fold bit comparisons when we can. if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && @@ -1967,9 +2462,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, N0.getOpcode() == ISD::AND) { auto &DL = DAG.getDataLayout(); if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { - EVT ShiftTy = DCI.isBeforeLegalize() - ? getPointerTy(DL) - : getShiftAmountTy(N0.getValueType(), DL); + EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL, + !DCI.isBeforeLegalize()); if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3 // Perform the xform if the AND RHS is a single bit. if (AndRHS->getAPIntValue().isPowerOf2()) { @@ -2001,9 +2495,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) { unsigned ShiftBits = AndRHSC.countTrailingZeros(); auto &DL = DAG.getDataLayout(); - EVT ShiftTy = DCI.isBeforeLegalize() - ? getPointerTy(DL) - : getShiftAmountTy(N0.getValueType(), DL); + EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL, + !DCI.isBeforeLegalize()); EVT CmpTy = N0.getValueType(); SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0), DAG.getConstant(ShiftBits, dl, @@ -2033,9 +2526,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (ShiftBits && NewC.getMinSignedBits() <= 64 && isLegalICmpImmediate(NewC.getSExtValue())) { auto &DL = DAG.getDataLayout(); - EVT ShiftTy = DCI.isBeforeLegalize() - ? getPointerTy(DL) - : getShiftAmountTy(N0.getValueType(), DL); + EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL, + !DCI.isBeforeLegalize()); EVT CmpTy = N0.getValueType(); SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0, DAG.getConstant(ShiftBits, dl, ShiftTy)); @@ -2058,9 +2550,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, switch (ISD::getUnorderedFlavor(Cond)) { default: llvm_unreachable("Unknown flavor!"); case 0: // Known false. - return DAG.getConstant(0, dl, VT); + return DAG.getBoolConstant(false, dl, VT, OpVT); case 1: // Known true. - return DAG.getConstant(1, dl, VT); + return DAG.getBoolConstant(true, dl, VT, OpVT); case 2: // Undefined. return DAG.getUNDEF(VT); } @@ -2124,31 +2616,24 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (N0 == N1) { // The sext(setcc()) => setcc() optimization relies on the appropriate // constant being emitted. - uint64_t EqVal = 0; - switch (getBooleanContents(N0.getValueType())) { - case UndefinedBooleanContent: - case ZeroOrOneBooleanContent: - EqVal = ISD::isTrueWhenEqual(Cond); - break; - case ZeroOrNegativeOneBooleanContent: - EqVal = ISD::isTrueWhenEqual(Cond) ? -1 : 0; - break; - } + + bool EqTrue = ISD::isTrueWhenEqual(Cond); // We can always fold X == X for integer setcc's. - if (N0.getValueType().isInteger()) { - return DAG.getConstant(EqVal, dl, VT); - } + if (N0.getValueType().isInteger()) + return DAG.getBoolConstant(EqTrue, dl, VT, OpVT); + unsigned UOF = ISD::getUnorderedFlavor(Cond); if (UOF == 2) // FP operators that are undefined on NaNs. - return DAG.getConstant(EqVal, dl, VT); - if (UOF == unsigned(ISD::isTrueWhenEqual(Cond))) - return DAG.getConstant(EqVal, dl, VT); + return DAG.getBoolConstant(EqTrue, dl, VT, OpVT); + if (UOF == unsigned(EqTrue)) + return DAG.getBoolConstant(EqTrue, dl, VT, OpVT); // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO // if it is not already. ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO; - if (NewCond != Cond && (DCI.isBeforeLegalizeOps() || - getCondCodeAction(NewCond, N0.getSimpleValueType()) == Legal)) + if (NewCond != Cond && + (DCI.isBeforeLegalizeOps() || + isCondCodeLegal(NewCond, N0.getSimpleValueType()))) return DAG.getSetCC(dl, VT, N0, N1, NewCond); } @@ -2237,7 +2722,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, SDValue SH = DAG.getNode( ISD::SHL, dl, N1.getValueType(), N1, DAG.getConstant(1, dl, - getShiftAmountTy(N1.getValueType(), DL))); + getShiftAmountTy(N1.getValueType(), DL, + !DCI.isBeforeLegalize()))); if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(SH.getNode()); return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond); @@ -2262,7 +2748,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // X == (Z-X) --> X<<1 == Z SDValue SH = DAG.getNode( ISD::SHL, dl, N1.getValueType(), N0, - DAG.getConstant(1, dl, getShiftAmountTy(N0.getValueType(), DL))); + DAG.getConstant(1, dl, getShiftAmountTy(N0.getValueType(), DL, + !DCI.isBeforeLegalize()))); if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(SH.getNode()); return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond); @@ -2276,50 +2763,52 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // Fold away ALL boolean setcc's. SDValue Temp; - if (N0.getValueType() == MVT::i1 && foldBooleans) { + if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) { + EVT OpVT = N0.getValueType(); switch (Cond) { default: llvm_unreachable("Unknown integer setcc!"); case ISD::SETEQ: // X == Y -> ~(X^Y) - Temp = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1); - N0 = DAG.getNOT(dl, Temp, MVT::i1); + Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1); + N0 = DAG.getNOT(dl, Temp, OpVT); if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(Temp.getNode()); break; case ISD::SETNE: // X != Y --> (X^Y) - N0 = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1); + N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1); break; case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y - Temp = DAG.getNOT(dl, N0, MVT::i1); - N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N1, Temp); + Temp = DAG.getNOT(dl, N0, OpVT); + N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp); if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(Temp.getNode()); break; case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X - Temp = DAG.getNOT(dl, N1, MVT::i1); - N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N0, Temp); + Temp = DAG.getNOT(dl, N1, OpVT); + N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp); if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(Temp.getNode()); break; case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y - Temp = DAG.getNOT(dl, N0, MVT::i1); - N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N1, Temp); + Temp = DAG.getNOT(dl, N0, OpVT); + N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp); if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(Temp.getNode()); break; case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X - Temp = DAG.getNOT(dl, N1, MVT::i1); - N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N0, Temp); + Temp = DAG.getNOT(dl, N1, OpVT); + N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp); break; } - if (VT != MVT::i1) { + if (VT.getScalarType() != MVT::i1) { if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(N0.getNode()); // FIXME: If running after legalize, we probably can't do this. - N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0); + ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT)); + N0 = DAG.getNode(ExtendCode, dl, VT, N0); } return N0; } @@ -2928,7 +3417,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, } } -/// \brief Given an exact SDIV by a constant, create a multiplication +/// Given an exact SDIV by a constant, create a multiplication /// with the multiplicative inverse of the constant. static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d, const SDLoc &dl, SelectionDAG &DAG, @@ -2970,7 +3459,7 @@ SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, return SDValue(); } -/// \brief Given an ISD::SDIV node expressing a divide by constant, +/// Given an ISD::SDIV node expressing a divide by constant, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". @@ -3034,7 +3523,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor, return DAG.getNode(ISD::ADD, dl, VT, Q, T); } -/// \brief Given an ISD::UDIV node expressing a divide by constant, +/// Given an ISD::UDIV node expressing a divide by constant, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". @@ -3413,9 +3902,6 @@ SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, return DAG.getMergeValues({ Value, NewChain }, SL); } -// FIXME: This relies on each element having a byte size, otherwise the stride -// is 0 and just overwrites the same location. ExpandStore currently expects -// this broken behavior. SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const { SDLoc SL(ST); @@ -3432,11 +3918,43 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST, // The type of data as saved in memory. EVT MemSclVT = StVT.getScalarType(); - // Store Stride in bytes - unsigned Stride = MemSclVT.getSizeInBits() / 8; EVT IdxVT = getVectorIdxTy(DAG.getDataLayout()); unsigned NumElem = StVT.getVectorNumElements(); + // A vector must always be stored in memory as-is, i.e. without any padding + // between the elements, since various code depend on it, e.g. in the + // handling of a bitcast of a vector type to int, which may be done with a + // vector store followed by an integer load. A vector that does not have + // elements that are byte-sized must therefore be stored as an integer + // built out of the extracted vector elements. + if (!MemSclVT.isByteSized()) { + unsigned NumBits = StVT.getSizeInBits(); + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits); + + SDValue CurrVal = DAG.getConstant(0, SL, IntVT); + + for (unsigned Idx = 0; Idx < NumElem; ++Idx) { + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value, + DAG.getConstant(Idx, SL, IdxVT)); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt); + SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc); + unsigned ShiftIntoIdx = + (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx); + SDValue ShiftAmount = + DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT); + SDValue ShiftedElt = + DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount); + CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt); + } + + return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(), + ST->getAlignment(), ST->getMemOperand()->getFlags(), + ST->getAAInfo()); + } + + // Store Stride in bytes + unsigned Stride = MemSclVT.getSizeInBits() / 8; + assert (Stride && "Zero stride!"); // Extract each of the elements from the original vector and save them into // memory individually. SmallVector<SDValue, 8> Stores; @@ -3475,6 +3993,8 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { if (!isOperationLegalOrCustom(ISD::LOAD, intVT)) { // Scalarize the load and let the individual components be handled. SDValue Scalarized = scalarizeVectorLoad(LD, DAG); + if (Scalarized->getOpcode() == ISD::MERGE_VALUES) + return std::make_pair(Scalarized.getOperand(0), Scalarized.getOperand(1)); return std::make_pair(Scalarized.getValue(0), Scalarized.getValue(1)); } diff --git a/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp b/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp index b35bf6ba3a7b..d3454ca6ba6a 100644 --- a/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp +++ b/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp @@ -53,6 +53,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CFG.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" @@ -62,11 +63,13 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Attributes.h" @@ -97,7 +100,7 @@ EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden, namespace { -/// \brief Class to determine where the safe point to insert the +/// Class to determine where the safe point to insert the /// prologue and epilogue are. /// Unlike the paper from Fred C. Chow, PLDI'88, that introduces the /// shrink-wrapping term for prologue/epilogue placement, this pass @@ -128,6 +131,9 @@ class ShrinkWrap : public MachineFunctionPass { /// are in the same loop. MachineLoopInfo *MLI; + // Emit remarks. + MachineOptimizationRemarkEmitter *ORE = nullptr; + /// Frequency of the Entry block. uint64_t EntryFreq; @@ -137,6 +143,9 @@ class ShrinkWrap : public MachineFunctionPass { /// Current opcode for frame destroy. unsigned FrameDestroyOpcode; + /// Stack pointer register, used by llvm.{savestack,restorestack} + unsigned SP; + /// Entry block. const MachineBasicBlock *Entry; @@ -148,7 +157,7 @@ class ShrinkWrap : public MachineFunctionPass { /// Current MachineFunction. MachineFunction *MachineFunc; - /// \brief Check if \p MI uses or defines a callee-saved register or + /// Check if \p MI uses or defines a callee-saved register or /// a frame index. If this is the case, this means \p MI must happen /// after Save and before Restore. bool useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS) const; @@ -168,14 +177,14 @@ class ShrinkWrap : public MachineFunctionPass { return CurrentCSRs; } - /// \brief Update the Save and Restore points such that \p MBB is in + /// Update the Save and Restore points such that \p MBB is in /// the region that is dominated by Save and post-dominated by Restore /// and Save and Restore still match the safe point definition. /// Such point may not exist and Save and/or Restore may be null after /// this call. void updateSaveRestorePoints(MachineBasicBlock &MBB, RegScavenger *RS); - /// \brief Initialize the pass for \p MF. + /// Initialize the pass for \p MF. void init(MachineFunction &MF) { RCI.runOnMachineFunction(MF); MDT = &getAnalysis<MachineDominatorTree>(); @@ -184,10 +193,13 @@ class ShrinkWrap : public MachineFunctionPass { Restore = nullptr; MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); MLI = &getAnalysis<MachineLoopInfo>(); + ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE(); EntryFreq = MBFI->getEntryFreq(); - const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + const TargetSubtargetInfo &Subtarget = MF.getSubtarget(); + const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); FrameSetupOpcode = TII.getCallFrameSetupOpcode(); FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); + SP = Subtarget.getTargetLowering()->getStackPointerRegisterToSaveRestore(); Entry = &MF.front(); CurrentCSRs.clear(); MachineFunc = &MF; @@ -199,7 +211,7 @@ class ShrinkWrap : public MachineFunctionPass { /// shrink-wrapping. bool ArePointsInteresting() const { return Save != Entry && Save && Restore; } - /// \brief Check if shrink wrapping is enabled for this target and function. + /// Check if shrink wrapping is enabled for this target and function. static bool isShrinkWrapEnabled(const MachineFunction &MF); public: @@ -215,12 +227,18 @@ public: AU.addRequired<MachineDominatorTree>(); AU.addRequired<MachinePostDominatorTree>(); AU.addRequired<MachineLoopInfo>(); + AU.addRequired<MachineOptimizationRemarkEmitterPass>(); MachineFunctionPass::getAnalysisUsage(AU); } + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + StringRef getPassName() const override { return "Shrink Wrapping analysis"; } - /// \brief Perform the shrink-wrapping analysis and update + /// Perform the shrink-wrapping analysis and update /// the MachineFrameInfo attached to \p MF with the results. bool runOnMachineFunction(MachineFunction &MF) override; }; @@ -236,28 +254,34 @@ INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass) INITIALIZE_PASS_END(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false) bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS) const { - // Ignore DBG_VALUE and other meta instructions that must not affect codegen. - if (MI.isMetaInstruction()) - return false; - if (MI.getOpcode() == FrameSetupOpcode || MI.getOpcode() == FrameDestroyOpcode) { - DEBUG(dbgs() << "Frame instruction: " << MI << '\n'); + LLVM_DEBUG(dbgs() << "Frame instruction: " << MI << '\n'); return true; } for (const MachineOperand &MO : MI.operands()) { bool UseOrDefCSR = false; if (MO.isReg()) { + // Ignore instructions like DBG_VALUE which don't read/def the register. + if (!MO.isDef() && !MO.readsReg()) + continue; unsigned PhysReg = MO.getReg(); if (!PhysReg) continue; assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && "Unallocated register?!"); - UseOrDefCSR = RCI.getLastCalleeSavedAlias(PhysReg); + // The stack pointer is not normally described as a callee-saved register + // in calling convention definitions, so we need to watch for it + // separately. An SP mentioned by a call instruction, we can ignore, + // though, as it's harmless and we do not want to effectively disable tail + // calls by forcing the restore point to post-dominate them. + UseOrDefCSR = (!MI.isCall() && PhysReg == SP) || + RCI.getLastCalleeSavedAlias(PhysReg); } else if (MO.isRegMask()) { // Check if this regmask clobbers any of the CSRs. for (unsigned Reg : getCurrentCSRs(RS)) { @@ -267,16 +291,17 @@ bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI, } } } - if (UseOrDefCSR || MO.isFI()) { - DEBUG(dbgs() << "Use or define CSR(" << UseOrDefCSR << ") or FI(" - << MO.isFI() << "): " << MI << '\n'); + // Skip FrameIndex operands in DBG_VALUE instructions. + if (UseOrDefCSR || (MO.isFI() && !MI.isDebugValue())) { + LLVM_DEBUG(dbgs() << "Use or define CSR(" << UseOrDefCSR << ") or FI(" + << MO.isFI() << "): " << MI << '\n'); return true; } } return false; } -/// \brief Helper function to find the immediate (post) dominator. +/// Helper function to find the immediate (post) dominator. template <typename ListOfBBs, typename DominanceAnalysis> static MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs, DominanceAnalysis &Dom) { @@ -300,7 +325,7 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB, Save = MDT->findNearestCommonDominator(Save, &MBB); if (!Save) { - DEBUG(dbgs() << "Found a block that is not reachable from Entry\n"); + LLVM_DEBUG(dbgs() << "Found a block that is not reachable from Entry\n"); return; } @@ -334,7 +359,8 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB, } if (!Restore) { - DEBUG(dbgs() << "Restore point needs to be spanned on several blocks\n"); + LLVM_DEBUG( + dbgs() << "Restore point needs to be spanned on several blocks\n"); return; } @@ -413,38 +439,16 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB, } } -/// Check whether the edge (\p SrcBB, \p DestBB) is a backedge according to MLI. -/// I.e., check if it exists a loop that contains SrcBB and where DestBB is the -/// loop header. -static bool isProperBackedge(const MachineLoopInfo &MLI, - const MachineBasicBlock *SrcBB, - const MachineBasicBlock *DestBB) { - for (const MachineLoop *Loop = MLI.getLoopFor(SrcBB); Loop; - Loop = Loop->getParentLoop()) { - if (Loop->getHeader() == DestBB) - return true; - } - return false; -} +static bool giveUpWithRemarks(MachineOptimizationRemarkEmitter *ORE, + StringRef RemarkName, StringRef RemarkMessage, + const DiagnosticLocation &Loc, + const MachineBasicBlock *MBB) { + ORE->emit([&]() { + return MachineOptimizationRemarkMissed(DEBUG_TYPE, RemarkName, Loc, MBB) + << RemarkMessage; + }); -/// Check if the CFG of \p MF is irreducible. -static bool isIrreducibleCFG(const MachineFunction &MF, - const MachineLoopInfo &MLI) { - const MachineBasicBlock *Entry = &*MF.begin(); - ReversePostOrderTraversal<const MachineBasicBlock *> RPOT(Entry); - BitVector VisitedBB(MF.getNumBlockIDs()); - for (const MachineBasicBlock *MBB : RPOT) { - VisitedBB.set(MBB->getNumber()); - for (const MachineBasicBlock *SuccBB : MBB->successors()) { - if (!VisitedBB.test(SuccBB->getNumber())) - continue; - // We already visited SuccBB, thus MBB->SuccBB must be a backedge. - // Check that the head matches what we have in the loop information. - // Otherwise, we have an irreducible graph. - if (!isProperBackedge(MLI, MBB, SuccBB)) - return true; - } - } + LLVM_DEBUG(dbgs() << RemarkMessage << '\n'); return false; } @@ -452,19 +456,21 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction()) || MF.empty() || !isShrinkWrapEnabled(MF)) return false; - DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n'); + LLVM_DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n'); init(MF); - if (isIrreducibleCFG(MF, *MLI)) { + ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin()); + if (containsIrreducibleCFG<MachineBasicBlock *>(RPOT, *MLI)) { // If MF is irreducible, a block may be in a loop without // MachineLoopInfo reporting it. I.e., we may use the // post-dominance property in loops, which lead to incorrect // results. Moreover, we may miss that the prologue and // epilogue are not in the same loop, leading to unbalanced // construction/deconstruction of the stack frame. - DEBUG(dbgs() << "Irreducible CFGs are not supported yet\n"); - return false; + return giveUpWithRemarks(ORE, "UnsupportedIrreducibleCFG", + "Irreducible CFGs are not supported yet.", + MF.getFunction().getSubprogram(), &MF.front()); } const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); @@ -472,12 +478,28 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr); for (MachineBasicBlock &MBB : MF) { - DEBUG(dbgs() << "Look into: " << MBB.getNumber() << ' ' << MBB.getName() - << '\n'); - - if (MBB.isEHFuncletEntry()) { - DEBUG(dbgs() << "EH Funclets are not supported yet.\n"); - return false; + LLVM_DEBUG(dbgs() << "Look into: " << MBB.getNumber() << ' ' + << MBB.getName() << '\n'); + + if (MBB.isEHFuncletEntry()) + return giveUpWithRemarks(ORE, "UnsupportedEHFunclets", + "EH Funclets are not supported yet.", + MBB.front().getDebugLoc(), &MBB); + + if (MBB.isEHPad()) { + // Push the prologue and epilogue outside of + // the region that may throw by making sure + // that all the landing pads are at least at the + // boundary of the save and restore points. + // The problem with exceptions is that the throw + // is not properly modeled and in particular, a + // basic block can jump out from the middle. + updateSaveRestorePoints(MBB, RS.get()); + if (!ArePointsInteresting()) { + LLVM_DEBUG(dbgs() << "EHPad prevents shrink-wrapping\n"); + return false; + } + continue; } for (const MachineInstr &MI : MBB) { @@ -489,7 +511,7 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { // If we are at a point where we cannot improve the placement of // save/restore instructions, just give up. if (!ArePointsInteresting()) { - DEBUG(dbgs() << "No Shrink wrap candidate found\n"); + LLVM_DEBUG(dbgs() << "No Shrink wrap candidate found\n"); return false; } // No need to look for other instructions, this basic block @@ -502,20 +524,21 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { // because it means we did not encounter any frame/CSR related code. // Otherwise, we would have returned from the previous loop. assert(!Save && !Restore && "We miss a shrink-wrap opportunity?!"); - DEBUG(dbgs() << "Nothing to shrink-wrap\n"); + LLVM_DEBUG(dbgs() << "Nothing to shrink-wrap\n"); return false; } - DEBUG(dbgs() << "\n ** Results **\nFrequency of the Entry: " << EntryFreq - << '\n'); + LLVM_DEBUG(dbgs() << "\n ** Results **\nFrequency of the Entry: " << EntryFreq + << '\n'); const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); do { - DEBUG(dbgs() << "Shrink wrap candidates (#, Name, Freq):\nSave: " - << Save->getNumber() << ' ' << Save->getName() << ' ' - << MBFI->getBlockFreq(Save).getFrequency() << "\nRestore: " - << Restore->getNumber() << ' ' << Restore->getName() << ' ' - << MBFI->getBlockFreq(Restore).getFrequency() << '\n'); + LLVM_DEBUG(dbgs() << "Shrink wrap candidates (#, Name, Freq):\nSave: " + << Save->getNumber() << ' ' << Save->getName() << ' ' + << MBFI->getBlockFreq(Save).getFrequency() + << "\nRestore: " << Restore->getNumber() << ' ' + << Restore->getName() << ' ' + << MBFI->getBlockFreq(Restore).getFrequency() << '\n'); bool IsSaveCheap, TargetCanUseSaveAsPrologue = false; if (((IsSaveCheap = EntryFreq >= MBFI->getBlockFreq(Save).getFrequency()) && @@ -523,7 +546,8 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { ((TargetCanUseSaveAsPrologue = TFI->canUseAsPrologue(*Save)) && TFI->canUseAsEpilogue(*Restore))) break; - DEBUG(dbgs() << "New points are too expensive or invalid for the target\n"); + LLVM_DEBUG( + dbgs() << "New points are too expensive or invalid for the target\n"); MachineBasicBlock *NewBB; if (!IsSaveCheap || !TargetCanUseSaveAsPrologue) { Save = FindIDom<>(*Save, Save->predecessors(), *MDT); @@ -545,9 +569,10 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { return false; } - DEBUG(dbgs() << "Final shrink wrap candidates:\nSave: " << Save->getNumber() - << ' ' << Save->getName() << "\nRestore: " - << Restore->getNumber() << ' ' << Restore->getName() << '\n'); + LLVM_DEBUG(dbgs() << "Final shrink wrap candidates:\nSave: " + << Save->getNumber() << ' ' << Save->getName() + << "\nRestore: " << Restore->getNumber() << ' ' + << Restore->getName() << '\n'); MachineFrameInfo &MFI = MF.getFrameInfo(); MFI.setSavePoint(Save); diff --git a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp index 17a3a84ecda5..5d2669f5ae92 100644 --- a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -27,7 +28,6 @@ #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/Local.h" using namespace llvm; #define DEBUG_TYPE "sjljehprepare" @@ -64,7 +64,6 @@ public: private: bool setupEntryBlockAndCallSites(Function &F); - bool undoSwiftErrorSelect(Function &F); void substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, Value *SelVal); Value *setupFunctionContext(Function &F, ArrayRef<LandingPadInst *> LPads); void lowerIncomingArguments(Function &F); @@ -233,6 +232,13 @@ void SjLjEHPrepare::lowerIncomingArguments(Function &F) { assert(AfterAllocaInsPt != F.front().end()); for (auto &AI : F.args()) { + // Swift error really is a register that we model as memory -- instruction + // selection will perform mem-to-reg for us and spill/reload appropriately + // around calls that clobber it. There is no need to spill this + // value to the stack and doing so would not be allowed. + if (AI.isSwiftError()) + continue; + Type *Ty = AI.getType(); // Use 'select i8 true, %arg, undef' to simulate a 'no-op' instruction. @@ -301,8 +307,8 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, for (InvokeInst *Invoke : Invokes) { BasicBlock *UnwindBlock = Invoke->getUnwindDest(); if (UnwindBlock != &BB && LiveBBs.count(UnwindBlock)) { - DEBUG(dbgs() << "SJLJ Spill: " << Inst << " around " - << UnwindBlock->getName() << "\n"); + LLVM_DEBUG(dbgs() << "SJLJ Spill: " << Inst << " around " + << UnwindBlock->getName() << "\n"); NeedsSpill = true; break; } @@ -462,25 +468,6 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { return true; } -bool SjLjEHPrepare::undoSwiftErrorSelect(Function &F) { - // We have inserted dummy copies 'select true, arg, undef' in the entry block - // for arguments to simplify this pass. - // swifterror arguments cannot be used in this way. Undo the select for the - // swifterror argument. - for (auto &AI : F.args()) { - if (AI.isSwiftError()) { - assert(AI.hasOneUse() && "Must have converted the argument to a select"); - auto *Select = dyn_cast<SelectInst>(AI.use_begin()->getUser()); - assert(Select && "There must be single select user"); - auto *OrigSwiftError = cast<Argument>(Select->getTrueValue()); - Select->replaceAllUsesWith(OrigSwiftError); - Select->eraseFromParent(); - return true; - } - } - return false; -} - bool SjLjEHPrepare::runOnFunction(Function &F) { Module &M = *F.getParent(); RegisterFn = M.getOrInsertFunction( @@ -499,7 +486,5 @@ bool SjLjEHPrepare::runOnFunction(Function &F) { FuncCtxFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_functioncontext); bool Res = setupEntryBlockAndCallSites(F); - if (Res) - Res |= undoSwiftErrorSelect(F); return Res; } diff --git a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp index ea74c777e1e2..ed74b3e4fa19 100644 --- a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp +++ b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp @@ -10,6 +10,7 @@ #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Config/llvm-config.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -73,7 +74,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { SlotIndex blockStartIndex(&indexList.back(), SlotIndex::Slot_Block); for (MachineInstr &MI : MBB) { - if (MI.isDebugValue()) + if (MI.isDebugInstr()) continue; // Insert a store index for the instr. @@ -94,9 +95,9 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { } // Sort the Idx2MBBMap - std::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare()); + llvm::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare()); - DEBUG(mf->print(dbgs(), this)); + LLVM_DEBUG(mf->print(dbgs(), this)); // And we're done! return false; @@ -146,7 +147,7 @@ void SlotIndexes::removeSingleMachineInstrFromMaps(MachineInstr &MI) { void SlotIndexes::renumberIndexes() { // Renumber updates the index of every element of the index list. - DEBUG(dbgs() << "\n*** Renumbering SlotIndexes ***\n"); + LLVM_DEBUG(dbgs() << "\n*** Renumbering SlotIndexes ***\n"); ++NumGlobalRenum; unsigned index = 0; @@ -173,8 +174,8 @@ void SlotIndexes::renumberIndexes(IndexList::iterator curItr) { // If the next index is bigger, we have caught up. } while (curItr != indexList.end() && curItr->getIndex() <= index); - DEBUG(dbgs() << "\n*** Renumbered SlotIndexes " << startItr->getIndex() << '-' - << index << " ***\n"); + LLVM_DEBUG(dbgs() << "\n*** Renumbered SlotIndexes " << startItr->getIndex() + << '-' << index << " ***\n"); ++NumLocalRenum; } @@ -244,7 +245,7 @@ void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB, for (MachineBasicBlock::iterator I = End; I != Begin;) { --I; MachineInstr &MI = *I; - if (!MI.isDebugValue() && mi2iMap.find(&MI) == mi2iMap.end()) + if (!MI.isDebugInstr() && mi2iMap.find(&MI) == mi2iMap.end()) insertMachineInstrInMaps(MI); } } diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.cpp b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp index b989b54d4190..f6786b30b21c 100644 --- a/contrib/llvm/lib/CodeGen/SpillPlacement.cpp +++ b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp @@ -246,7 +246,7 @@ void SpillPlacement::activate(unsigned n) { } } -/// \brief Set the threshold for a given entry frequency. +/// Set the threshold for a given entry frequency. /// /// Set the threshold relative to \c Entry. Since the threshold is used as a /// bound on the open interval (-Threshold;Threshold), 1 is the minimum diff --git a/contrib/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm/lib/CodeGen/SplitKit.cpp index 1628ee28b8a3..d639f4475301 100644 --- a/contrib/llvm/lib/CodeGen/SplitKit.cpp +++ b/contrib/llvm/lib/CodeGen/SplitKit.cpp @@ -39,6 +39,7 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/DebugLoc.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/Support/Allocator.h" @@ -191,7 +192,7 @@ void SplitAnalysis::analyzeUses() { // I am looking at you, RegisterCoalescer! DidRepairRange = true; ++NumRepairs; - DEBUG(dbgs() << "*** Fixing inconsistent live interval! ***\n"); + LLVM_DEBUG(dbgs() << "*** Fixing inconsistent live interval! ***\n"); const_cast<LiveIntervals&>(LIS) .shrinkToUses(const_cast<LiveInterval*>(CurLI)); UseBlocks.clear(); @@ -201,10 +202,9 @@ void SplitAnalysis::analyzeUses() { assert(fixed && "Couldn't fix broken live interval"); } - DEBUG(dbgs() << "Analyze counted " - << UseSlots.size() << " instrs in " - << UseBlocks.size() << " blocks, through " - << NumThroughBlocks << " blocks.\n"); + LLVM_DEBUG(dbgs() << "Analyze counted " << UseSlots.size() << " instrs in " + << UseBlocks.size() << " blocks, through " + << NumThroughBlocks << " blocks.\n"); } /// calcLiveBlockInfo - Fill the LiveBlocks array with information about blocks @@ -685,20 +685,20 @@ unsigned SplitEditor::openIntv() { void SplitEditor::selectIntv(unsigned Idx) { assert(Idx != 0 && "Cannot select the complement interval"); assert(Idx < Edit->size() && "Can only select previously opened interval"); - DEBUG(dbgs() << " selectIntv " << OpenIdx << " -> " << Idx << '\n'); + LLVM_DEBUG(dbgs() << " selectIntv " << OpenIdx << " -> " << Idx << '\n'); OpenIdx = Idx; } SlotIndex SplitEditor::enterIntvBefore(SlotIndex Idx) { assert(OpenIdx && "openIntv not called before enterIntvBefore"); - DEBUG(dbgs() << " enterIntvBefore " << Idx); + LLVM_DEBUG(dbgs() << " enterIntvBefore " << Idx); Idx = Idx.getBaseIndex(); VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx); if (!ParentVNI) { - DEBUG(dbgs() << ": not live\n"); + LLVM_DEBUG(dbgs() << ": not live\n"); return Idx; } - DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n'); + LLVM_DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n'); MachineInstr *MI = LIS.getInstructionFromIndex(Idx); assert(MI && "enterIntvBefore called with invalid index"); @@ -708,14 +708,14 @@ SlotIndex SplitEditor::enterIntvBefore(SlotIndex Idx) { SlotIndex SplitEditor::enterIntvAfter(SlotIndex Idx) { assert(OpenIdx && "openIntv not called before enterIntvAfter"); - DEBUG(dbgs() << " enterIntvAfter " << Idx); + LLVM_DEBUG(dbgs() << " enterIntvAfter " << Idx); Idx = Idx.getBoundaryIndex(); VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx); if (!ParentVNI) { - DEBUG(dbgs() << ": not live\n"); + LLVM_DEBUG(dbgs() << ": not live\n"); return Idx; } - DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n'); + LLVM_DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n'); MachineInstr *MI = LIS.getInstructionFromIndex(Idx); assert(MI && "enterIntvAfter called with invalid index"); @@ -728,18 +728,18 @@ SlotIndex SplitEditor::enterIntvAtEnd(MachineBasicBlock &MBB) { assert(OpenIdx && "openIntv not called before enterIntvAtEnd"); SlotIndex End = LIS.getMBBEndIdx(&MBB); SlotIndex Last = End.getPrevSlot(); - DEBUG(dbgs() << " enterIntvAtEnd " << printMBBReference(MBB) << ", " - << Last); + LLVM_DEBUG(dbgs() << " enterIntvAtEnd " << printMBBReference(MBB) << ", " + << Last); VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Last); if (!ParentVNI) { - DEBUG(dbgs() << ": not live\n"); + LLVM_DEBUG(dbgs() << ": not live\n"); return End; } - DEBUG(dbgs() << ": valno " << ParentVNI->id); + LLVM_DEBUG(dbgs() << ": valno " << ParentVNI->id); VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Last, MBB, SA.getLastSplitPointIter(&MBB)); RegAssign.insert(VNI->def, End, OpenIdx); - DEBUG(dump()); + LLVM_DEBUG(dump()); return VNI->def; } @@ -750,23 +750,23 @@ void SplitEditor::useIntv(const MachineBasicBlock &MBB) { void SplitEditor::useIntv(SlotIndex Start, SlotIndex End) { assert(OpenIdx && "openIntv not called before useIntv"); - DEBUG(dbgs() << " useIntv [" << Start << ';' << End << "):"); + LLVM_DEBUG(dbgs() << " useIntv [" << Start << ';' << End << "):"); RegAssign.insert(Start, End, OpenIdx); - DEBUG(dump()); + LLVM_DEBUG(dump()); } SlotIndex SplitEditor::leaveIntvAfter(SlotIndex Idx) { assert(OpenIdx && "openIntv not called before leaveIntvAfter"); - DEBUG(dbgs() << " leaveIntvAfter " << Idx); + LLVM_DEBUG(dbgs() << " leaveIntvAfter " << Idx); // The interval must be live beyond the instruction at Idx. SlotIndex Boundary = Idx.getBoundaryIndex(); VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Boundary); if (!ParentVNI) { - DEBUG(dbgs() << ": not live\n"); + LLVM_DEBUG(dbgs() << ": not live\n"); return Boundary.getNextSlot(); } - DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n'); + LLVM_DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n'); MachineInstr *MI = LIS.getInstructionFromIndex(Boundary); assert(MI && "No instruction at index"); @@ -788,16 +788,16 @@ SlotIndex SplitEditor::leaveIntvAfter(SlotIndex Idx) { SlotIndex SplitEditor::leaveIntvBefore(SlotIndex Idx) { assert(OpenIdx && "openIntv not called before leaveIntvBefore"); - DEBUG(dbgs() << " leaveIntvBefore " << Idx); + LLVM_DEBUG(dbgs() << " leaveIntvBefore " << Idx); // The interval must be live into the instruction at Idx. Idx = Idx.getBaseIndex(); VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx); if (!ParentVNI) { - DEBUG(dbgs() << ": not live\n"); + LLVM_DEBUG(dbgs() << ": not live\n"); return Idx.getNextSlot(); } - DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n'); + LLVM_DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n'); MachineInstr *MI = LIS.getInstructionFromIndex(Idx); assert(MI && "No instruction at index"); @@ -808,19 +808,19 @@ SlotIndex SplitEditor::leaveIntvBefore(SlotIndex Idx) { SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) { assert(OpenIdx && "openIntv not called before leaveIntvAtTop"); SlotIndex Start = LIS.getMBBStartIdx(&MBB); - DEBUG(dbgs() << " leaveIntvAtTop " << printMBBReference(MBB) << ", " - << Start); + LLVM_DEBUG(dbgs() << " leaveIntvAtTop " << printMBBReference(MBB) << ", " + << Start); VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start); if (!ParentVNI) { - DEBUG(dbgs() << ": not live\n"); + LLVM_DEBUG(dbgs() << ": not live\n"); return Start; } VNInfo *VNI = defFromParent(0, ParentVNI, Start, MBB, MBB.SkipPHIsLabelsAndDebug(MBB.begin())); RegAssign.insert(Start, VNI->def, OpenIdx); - DEBUG(dump()); + LLVM_DEBUG(dump()); return VNI->def; } @@ -835,9 +835,9 @@ void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) { // The complement interval will be extended as needed by LRCalc.extend(). if (ParentVNI) forceRecompute(0, *ParentVNI); - DEBUG(dbgs() << " overlapIntv [" << Start << ';' << End << "):"); + LLVM_DEBUG(dbgs() << " overlapIntv [" << Start << ';' << End << "):"); RegAssign.insert(Start, End, OpenIdx); - DEBUG(dump()); + LLVM_DEBUG(dump()); } //===----------------------------------------------------------------------===// @@ -846,7 +846,7 @@ void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) { void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) { LiveInterval *LI = &LIS.getInterval(Edit->get(0)); - DEBUG(dbgs() << "Removing " << Copies.size() << " back-copies.\n"); + LLVM_DEBUG(dbgs() << "Removing " << Copies.size() << " back-copies.\n"); RegAssignMap::iterator AssignI; AssignI.setMap(RegAssign); @@ -859,9 +859,9 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) { MachineBasicBlock::iterator MBBI(MI); bool AtBegin; do AtBegin = MBBI == MBB->begin(); - while (!AtBegin && (--MBBI)->isDebugValue()); + while (!AtBegin && (--MBBI)->isDebugInstr()); - DEBUG(dbgs() << "Removing " << Def << '\t' << *MI); + LLVM_DEBUG(dbgs() << "Removing " << Def << '\t' << *MI); LIS.removeVRegDefAt(*LI, Def); LIS.RemoveMachineInstrFromMaps(*MI); MI->eraseFromParent(); @@ -876,11 +876,12 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) { continue; unsigned RegIdx = AssignI.value(); if (AtBegin || !MBBI->readsVirtualRegister(Edit->getReg())) { - DEBUG(dbgs() << " cannot find simple kill of RegIdx " << RegIdx << '\n'); + LLVM_DEBUG(dbgs() << " cannot find simple kill of RegIdx " << RegIdx + << '\n'); forceRecompute(RegIdx, *Edit->getParent().getVNInfoAt(Def)); } else { SlotIndex Kill = LIS.getInstructionIndex(*MBBI).getRegSlot(); - DEBUG(dbgs() << " move kill to " << Kill << '\t' << *MBBI); + LLVM_DEBUG(dbgs() << " move kill to " << Kill << '\t' << *MBBI); AssignI.setStop(Kill); } } @@ -907,15 +908,17 @@ SplitEditor::findShallowDominator(MachineBasicBlock *MBB, // MBB isn't in a loop, it doesn't get any better. All dominators have a // higher frequency by definition. if (!Loop) { - DEBUG(dbgs() << "Def in " << printMBBReference(*DefMBB) << " dominates " - << printMBBReference(*MBB) << " at depth 0\n"); + LLVM_DEBUG(dbgs() << "Def in " << printMBBReference(*DefMBB) + << " dominates " << printMBBReference(*MBB) + << " at depth 0\n"); return MBB; } // We'll never be able to exit the DefLoop. if (Loop == DefLoop) { - DEBUG(dbgs() << "Def in " << printMBBReference(*DefMBB) << " dominates " - << printMBBReference(*MBB) << " in the same loop\n"); + LLVM_DEBUG(dbgs() << "Def in " << printMBBReference(*DefMBB) + << " dominates " << printMBBReference(*MBB) + << " in the same loop\n"); return MBB; } @@ -924,8 +927,9 @@ SplitEditor::findShallowDominator(MachineBasicBlock *MBB, if (Depth < BestDepth) { BestMBB = MBB; BestDepth = Depth; - DEBUG(dbgs() << "Def in " << printMBBReference(*DefMBB) << " dominates " - << printMBBReference(*MBB) << " at depth " << Depth << '\n'); + LLVM_DEBUG(dbgs() << "Def in " << printMBBReference(*DefMBB) + << " dominates " << printMBBReference(*MBB) + << " at depth " << Depth << '\n'); } // Leave loop by going to the immediate dominator of the loop header. @@ -1031,14 +1035,14 @@ void SplitEditor::hoistCopies() { // instruction in the complement range. All other copies of ParentVNI // should be eliminated. if (VNI->def == ParentVNI->def) { - DEBUG(dbgs() << "Direct complement def at " << VNI->def << '\n'); + LLVM_DEBUG(dbgs() << "Direct complement def at " << VNI->def << '\n'); Dom = DomPair(ValMBB, VNI->def); continue; } // Skip the singly mapped values. There is nothing to gain from hoisting a // single back-copy. if (Values.lookup(std::make_pair(0, ParentVNI->id)).getPointer()) { - DEBUG(dbgs() << "Single complement def at " << VNI->def << '\n'); + LLVM_DEBUG(dbgs() << "Single complement def at " << VNI->def << '\n'); continue; } @@ -1062,10 +1066,11 @@ void SplitEditor::hoistCopies() { Costs[ParentVNI->id] += MBFI.getBlockFreq(ValMBB); } - DEBUG(dbgs() << "Multi-mapped complement " << VNI->id << '@' << VNI->def - << " for parent " << ParentVNI->id << '@' << ParentVNI->def - << " hoist to " << printMBBReference(*Dom.first) << ' ' - << Dom.second << '\n'); + LLVM_DEBUG(dbgs() << "Multi-mapped complement " << VNI->id << '@' + << VNI->def << " for parent " << ParentVNI->id << '@' + << ParentVNI->def << " hoist to " + << printMBBReference(*Dom.first) << ' ' << Dom.second + << '\n'); } // Insert the hoisted copies. @@ -1118,7 +1123,7 @@ bool SplitEditor::transferValues() { bool Skipped = false; RegAssignMap::const_iterator AssignI = RegAssign.begin(); for (const LiveRange::Segment &S : Edit->getParent()) { - DEBUG(dbgs() << " blit " << S << ':'); + LLVM_DEBUG(dbgs() << " blit " << S << ':'); VNInfo *ParentVNI = S.valno; // RegAssign has holes where RegIdx 0 should be used. SlotIndex Start = S.start; @@ -1140,14 +1145,14 @@ bool SplitEditor::transferValues() { } // The interval [Start;End) is continuously mapped to RegIdx, ParentVNI. - DEBUG(dbgs() << " [" << Start << ';' << End << ")=" << RegIdx - << '(' << printReg(Edit->get(RegIdx)) << ')'); + LLVM_DEBUG(dbgs() << " [" << Start << ';' << End << ")=" << RegIdx << '(' + << printReg(Edit->get(RegIdx)) << ')'); LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx)); // Check for a simply defined value that can be blitted directly. ValueForcePair VFP = Values.lookup(std::make_pair(RegIdx, ParentVNI->id)); if (VNInfo *VNI = VFP.getPointer()) { - DEBUG(dbgs() << ':' << VNI->id); + LLVM_DEBUG(dbgs() << ':' << VNI->id); LI.addSegment(LiveInterval::Segment(Start, End, VNI)); Start = End; continue; @@ -1155,7 +1160,7 @@ bool SplitEditor::transferValues() { // Skip values with forced recomputation. if (VFP.getInt()) { - DEBUG(dbgs() << "(recalc)"); + LLVM_DEBUG(dbgs() << "(recalc)"); Skipped = true; Start = End; continue; @@ -1174,7 +1179,7 @@ bool SplitEditor::transferValues() { if (Start != BlockStart) { VNInfo *VNI = LI.extendInBlock(BlockStart, std::min(BlockEnd, End)); assert(VNI && "Missing def for complex mapped value"); - DEBUG(dbgs() << ':' << VNI->id << "*" << printMBBReference(*MBB)); + LLVM_DEBUG(dbgs() << ':' << VNI->id << "*" << printMBBReference(*MBB)); // MBB has its own def. Is it also live-out? if (BlockEnd <= End) LRC.setLiveOutValue(&*MBB, VNI); @@ -1187,7 +1192,7 @@ bool SplitEditor::transferValues() { // Handle the live-in blocks covered by [Start;End). assert(Start <= BlockStart && "Expected live-in block"); while (BlockStart < End) { - DEBUG(dbgs() << ">" << printMBBReference(*MBB)); + LLVM_DEBUG(dbgs() << ">" << printMBBReference(*MBB)); BlockEnd = LIS.getMBBEndIdx(&*MBB); if (BlockStart == ParentVNI->def) { // This block has the def of a parent PHI, so it isn't live-in. @@ -1212,7 +1217,7 @@ bool SplitEditor::transferValues() { } Start = End; } while (Start != S.end); - DEBUG(dbgs() << '\n'); + LLVM_DEBUG(dbgs() << '\n'); } LRCalc[0].calculateValues(); @@ -1314,7 +1319,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { ++RI; // LiveDebugVariables should have handled all DBG_VALUE instructions. if (MI->isDebugValue()) { - DEBUG(dbgs() << "Zapping " << *MI); + LLVM_DEBUG(dbgs() << "Zapping " << *MI); MO.setReg(0); continue; } @@ -1330,8 +1335,8 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { unsigned RegIdx = RegAssign.lookup(Idx); LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx)); MO.setReg(LI.reg); - DEBUG(dbgs() << " rewr " << printMBBReference(*MI->getParent()) << '\t' - << Idx << ':' << RegIdx << '\t' << *MI); + LLVM_DEBUG(dbgs() << " rewr " << printMBBReference(*MI->getParent()) + << '\t' << Idx << ':' << RegIdx << '\t' << *MI); // Extend liveness to Idx if the instruction reads reg. if (!ExtendRanges || MO.isUndef()) @@ -1416,7 +1421,7 @@ void SplitEditor::deleteRematVictims() { if (!MI->allDefsAreDead()) continue; - DEBUG(dbgs() << "All defs dead: " << *MI); + LLVM_DEBUG(dbgs() << "All defs dead: " << *MI); Dead.push_back(MI); } } @@ -1598,9 +1603,9 @@ void SplitEditor::splitLiveThroughBlock(unsigned MBBNum, SlotIndex Start, Stop; std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(MBBNum); - DEBUG(dbgs() << "%bb." << MBBNum << " [" << Start << ';' << Stop << ") intf " - << LeaveBefore << '-' << EnterAfter << ", live-through " - << IntvIn << " -> " << IntvOut); + LLVM_DEBUG(dbgs() << "%bb." << MBBNum << " [" << Start << ';' << Stop + << ") intf " << LeaveBefore << '-' << EnterAfter + << ", live-through " << IntvIn << " -> " << IntvOut); assert((IntvIn || IntvOut) && "Use splitSingleBlock for isolated blocks"); @@ -1611,7 +1616,7 @@ void SplitEditor::splitLiveThroughBlock(unsigned MBBNum, MachineBasicBlock *MBB = VRM.getMachineFunction().getBlockNumbered(MBBNum); if (!IntvOut) { - DEBUG(dbgs() << ", spill on entry.\n"); + LLVM_DEBUG(dbgs() << ", spill on entry.\n"); // // <<<<<<<<< Possible LeaveBefore interference. // |-----------| Live through. @@ -1625,7 +1630,7 @@ void SplitEditor::splitLiveThroughBlock(unsigned MBBNum, } if (!IntvIn) { - DEBUG(dbgs() << ", reload on exit.\n"); + LLVM_DEBUG(dbgs() << ", reload on exit.\n"); // // >>>>>>> Possible EnterAfter interference. // |-----------| Live through. @@ -1639,7 +1644,7 @@ void SplitEditor::splitLiveThroughBlock(unsigned MBBNum, } if (IntvIn == IntvOut && !LeaveBefore && !EnterAfter) { - DEBUG(dbgs() << ", straight through.\n"); + LLVM_DEBUG(dbgs() << ", straight through.\n"); // // |-----------| Live through. // ------------- Straight through, same intv, no interference. @@ -1655,7 +1660,7 @@ void SplitEditor::splitLiveThroughBlock(unsigned MBBNum, if (IntvIn != IntvOut && (!LeaveBefore || !EnterAfter || LeaveBefore.getBaseIndex() > EnterAfter.getBoundaryIndex())) { - DEBUG(dbgs() << ", switch avoiding interference.\n"); + LLVM_DEBUG(dbgs() << ", switch avoiding interference.\n"); // // >>>> <<<< Non-overlapping EnterAfter/LeaveBefore interference. // |-----------| Live through. @@ -1676,7 +1681,7 @@ void SplitEditor::splitLiveThroughBlock(unsigned MBBNum, return; } - DEBUG(dbgs() << ", create local intv for interference.\n"); + LLVM_DEBUG(dbgs() << ", create local intv for interference.\n"); // // >>><><><><<<< Overlapping EnterAfter/LeaveBefore interference. // |-----------| Live through. @@ -1700,17 +1705,18 @@ void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI, SlotIndex Start, Stop; std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); - DEBUG(dbgs() << printMBBReference(*BI.MBB) << " [" << Start << ';' << Stop - << "), uses " << BI.FirstInstr << '-' << BI.LastInstr - << ", reg-in " << IntvIn << ", leave before " << LeaveBefore - << (BI.LiveOut ? ", stack-out" : ", killed in block")); + LLVM_DEBUG(dbgs() << printMBBReference(*BI.MBB) << " [" << Start << ';' + << Stop << "), uses " << BI.FirstInstr << '-' + << BI.LastInstr << ", reg-in " << IntvIn + << ", leave before " << LeaveBefore + << (BI.LiveOut ? ", stack-out" : ", killed in block")); assert(IntvIn && "Must have register in"); assert(BI.LiveIn && "Must be live-in"); assert((!LeaveBefore || LeaveBefore > Start) && "Bad interference"); if (!BI.LiveOut && (!LeaveBefore || LeaveBefore >= BI.LastInstr)) { - DEBUG(dbgs() << " before interference.\n"); + LLVM_DEBUG(dbgs() << " before interference.\n"); // // <<< Interference after kill. // |---o---x | Killed in block. @@ -1735,13 +1741,13 @@ void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI, // \_____ Stack interval is live-out. // if (BI.LastInstr < LSP) { - DEBUG(dbgs() << ", spill after last use before interference.\n"); + LLVM_DEBUG(dbgs() << ", spill after last use before interference.\n"); selectIntv(IntvIn); SlotIndex Idx = leaveIntvAfter(BI.LastInstr); useIntv(Start, Idx); assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference"); } else { - DEBUG(dbgs() << ", spill before last split point.\n"); + LLVM_DEBUG(dbgs() << ", spill before last split point.\n"); selectIntv(IntvIn); SlotIndex Idx = leaveIntvBefore(LSP); overlapIntv(Idx, BI.LastInstr); @@ -1756,7 +1762,7 @@ void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI, // different register. unsigned LocalIntv = openIntv(); (void)LocalIntv; - DEBUG(dbgs() << ", creating local interval " << LocalIntv << ".\n"); + LLVM_DEBUG(dbgs() << ", creating local interval " << LocalIntv << ".\n"); if (!BI.LiveOut || BI.LastInstr < LSP) { // @@ -1792,10 +1798,11 @@ void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI, SlotIndex Start, Stop; std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); - DEBUG(dbgs() << printMBBReference(*BI.MBB) << " [" << Start << ';' << Stop - << "), uses " << BI.FirstInstr << '-' << BI.LastInstr - << ", reg-out " << IntvOut << ", enter after " << EnterAfter - << (BI.LiveIn ? ", stack-in" : ", defined in block")); + LLVM_DEBUG(dbgs() << printMBBReference(*BI.MBB) << " [" << Start << ';' + << Stop << "), uses " << BI.FirstInstr << '-' + << BI.LastInstr << ", reg-out " << IntvOut + << ", enter after " << EnterAfter + << (BI.LiveIn ? ", stack-in" : ", defined in block")); SlotIndex LSP = SA.getLastSplitPoint(BI.MBB->getNumber()); @@ -1804,7 +1811,7 @@ void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI, assert((!EnterAfter || EnterAfter < LSP) && "Bad interference"); if (!BI.LiveIn && (!EnterAfter || EnterAfter <= BI.FirstInstr)) { - DEBUG(dbgs() << " after interference.\n"); + LLVM_DEBUG(dbgs() << " after interference.\n"); // // >>>> Interference before def. // | o---o---| Defined in block. @@ -1816,7 +1823,7 @@ void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI, } if (!EnterAfter || EnterAfter < BI.FirstInstr.getBaseIndex()) { - DEBUG(dbgs() << ", reload after interference.\n"); + LLVM_DEBUG(dbgs() << ", reload after interference.\n"); // // >>>> Interference before def. // |---o---o---| Live-through, stack-in. @@ -1832,7 +1839,7 @@ void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI, // The interference is overlapping somewhere we wanted to use IntvOut. That // means we need to create a local interval that can be allocated a // different register. - DEBUG(dbgs() << ", interference overlaps uses.\n"); + LLVM_DEBUG(dbgs() << ", interference overlaps uses.\n"); // // >>>>>>> Interference overlapping uses. // |---o---o---| Live-through, stack-in. diff --git a/contrib/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm/lib/CodeGen/SplitKit.h index 2dafaf587801..ed664e4f81a3 100644 --- a/contrib/llvm/lib/CodeGen/SplitKit.h +++ b/contrib/llvm/lib/CodeGen/SplitKit.h @@ -421,7 +421,7 @@ private: SlotIndex buildSingleSubRegCopy(unsigned FromReg, unsigned ToReg, MachineBasicBlock &MB, MachineBasicBlock::iterator InsertBefore, - unsigned SubIdx, LiveInterval &DestLI, bool Late, SlotIndex PrevCopy); + unsigned SubIdx, LiveInterval &DestLI, bool Late, SlotIndex Def); public: /// Create a new SplitEditor for editing the LiveInterval analyzed by SA. diff --git a/contrib/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm/lib/CodeGen/StackColoring.cpp index 608845498b48..81a41970f9e2 100644 --- a/contrib/llvm/lib/CodeGen/StackColoring.cpp +++ b/contrib/llvm/lib/CodeGen/StackColoring.cpp @@ -39,9 +39,9 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SlotIndexes.h" -#include "llvm/CodeGen/StackProtector.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/WinEHFuncInfo.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Function.h" @@ -422,9 +422,6 @@ class StackColoring : public MachineFunctionPass { /// SlotIndex analysis object. SlotIndexes *Indexes; - /// The stack protector object. - StackProtector *SP; - /// The list of lifetime markers found. These markers are to be removed /// once the coloring is done. SmallVector<MachineInstr*, 8> Markers; @@ -448,7 +445,7 @@ public: } void getAnalysisUsage(AnalysisUsage &AU) const override; - bool runOnMachineFunction(MachineFunction &MF) override; + bool runOnMachineFunction(MachineFunction &Func) override; private: /// Used in collectMarkers @@ -523,13 +520,11 @@ char &llvm::StackColoringID = StackColoring::ID; INITIALIZE_PASS_BEGIN(StackColoring, DEBUG_TYPE, "Merge disjoint stack slots", false, false) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) -INITIALIZE_PASS_DEPENDENCY(StackProtector) INITIALIZE_PASS_END(StackColoring, DEBUG_TYPE, "Merge disjoint stack slots", false, false) void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<SlotIndexes>(); - AU.addRequired<StackProtector>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -600,12 +595,12 @@ bool StackColoring::isLifetimeStartOrEnd(const MachineInstr &MI, isStart = false; return true; } - if (! applyFirstUse(Slot)) { + if (!applyFirstUse(Slot)) { isStart = true; return true; } } else if (LifetimeStartOnFirstUse && !ProtectFromEscapedAllocas) { - if (! MI.isDebugValue()) { + if (!MI.isDebugInstr()) { bool found = false; for (const MachineOperand &MO : MI.operands()) { if (!MO.isFI()) @@ -672,13 +667,13 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) { } const AllocaInst *Allocation = MFI->getObjectAllocation(Slot); if (Allocation) { - DEBUG(dbgs() << "Found a lifetime "); - DEBUG(dbgs() << (MI.getOpcode() == TargetOpcode::LIFETIME_START - ? "start" - : "end")); - DEBUG(dbgs() << " marker for slot #" << Slot); - DEBUG(dbgs() << " with allocation: " << Allocation->getName() - << "\n"); + LLVM_DEBUG(dbgs() << "Found a lifetime "); + LLVM_DEBUG(dbgs() << (MI.getOpcode() == TargetOpcode::LIFETIME_START + ? "start" + : "end")); + LLVM_DEBUG(dbgs() << " marker for slot #" << Slot); + LLVM_DEBUG(dbgs() + << " with allocation: " << Allocation->getName() << "\n"); } Markers.push_back(&MI); MarkersFound += 1; @@ -707,7 +702,7 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) { for (unsigned slot = 0; slot < NumSlot; ++slot) if (NumStartLifetimes[slot] > 1 || NumEndLifetimes[slot] > 1) ConservativeSlots.set(slot); - DEBUG(dumpBV("Conservative slots", ConservativeSlots)); + LLVM_DEBUG(dumpBV("Conservative slots", ConservativeSlots)); // Step 2: compute begin/end sets for each block @@ -738,14 +733,16 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) { BlockInfo.End.set(Slot); } else { for (auto Slot : slots) { - DEBUG(dbgs() << "Found a use of slot #" << Slot); - DEBUG(dbgs() << " at " << printMBBReference(*MBB) << " index "); - DEBUG(Indexes->getInstructionIndex(MI).print(dbgs())); + LLVM_DEBUG(dbgs() << "Found a use of slot #" << Slot); + LLVM_DEBUG(dbgs() + << " at " << printMBBReference(*MBB) << " index "); + LLVM_DEBUG(Indexes->getInstructionIndex(MI).print(dbgs())); const AllocaInst *Allocation = MFI->getObjectAllocation(Slot); if (Allocation) { - DEBUG(dbgs() << " with allocation: "<< Allocation->getName()); + LLVM_DEBUG(dbgs() + << " with allocation: " << Allocation->getName()); } - DEBUG(dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "\n"); if (BlockInfo.End.test(Slot)) { BlockInfo.End.reset(Slot); } @@ -779,8 +776,11 @@ void StackColoring::calculateLocalLiveness() { for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(), PE = BB->pred_end(); PI != PE; ++PI) { LivenessMap::const_iterator I = BlockLiveness.find(*PI); - assert(I != BlockLiveness.end() && "Predecessor not found"); - LocalLiveIn |= I->second.LiveOut; + // PR37130: transformations prior to stack coloring can + // sometimes leave behind statically unreachable blocks; these + // can be safely skipped here. + if (I != BlockLiveness.end()) + LocalLiveIn |= I->second.LiveOut; } // Compute LiveOut by subtracting out lifetimes that end in this @@ -880,7 +880,7 @@ bool StackColoring::removeAllMarkers() { } Markers.clear(); - DEBUG(dbgs()<<"Removed "<<Count<<" markers.\n"); + LLVM_DEBUG(dbgs() << "Removed " << Count << " markers.\n"); return Count; } @@ -894,8 +894,8 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { if (!VI.Var) continue; if (SlotRemap.count(VI.Slot)) { - DEBUG(dbgs() << "Remapping debug info for [" - << cast<DILocalVariable>(VI.Var)->getName() << "].\n"); + LLVM_DEBUG(dbgs() << "Remapping debug info for [" + << cast<DILocalVariable>(VI.Var)->getName() << "].\n"); VI.Slot = SlotRemap[VI.Slot]; FixedDbg++; } @@ -930,9 +930,17 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { MergedAllocas.insert(From); MergedAllocas.insert(To); - // Allow the stack protector to adjust its value map to account for the - // upcoming replacement. - SP->adjustForColoring(From, To); + // Transfer the stack protector layout tag, but make sure that SSPLK_AddrOf + // does not overwrite SSPLK_SmallArray or SSPLK_LargeArray, and make sure + // that SSPLK_SmallArray does not overwrite SSPLK_LargeArray. + MachineFrameInfo::SSPLayoutKind FromKind + = MFI->getObjectSSPLayout(SI.first); + MachineFrameInfo::SSPLayoutKind ToKind = MFI->getObjectSSPLayout(SI.second); + if (FromKind != MachineFrameInfo::SSPLK_None && + (ToKind == MachineFrameInfo::SSPLK_None || + (ToKind != MachineFrameInfo::SSPLK_LargeArray && + FromKind != MachineFrameInfo::SSPLK_AddrOf))) + MFI->setObjectSSPLayout(SI.second, FromKind); // The new alloca might not be valid in a llvm.dbg.declare for this // variable, so undef out the use to make the verifier happy. @@ -993,13 +1001,13 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { // the calculated range then it means that the alloca usage moved // outside of the lifetime markers, or that the user has a bug. // NOTE: Alloca address calculations which happen outside the lifetime - // zone are are okay, despite the fact that we don't have a good way + // zone are okay, despite the fact that we don't have a good way // for validating all of the usages of the calculation. #ifndef NDEBUG bool TouchesMemory = I.mayLoad() || I.mayStore(); // If we *don't* protect the user from escaped allocas, don't bother // validating the instructions. - if (!I.isDebugValue() && TouchesMemory && ProtectFromEscapedAllocas) { + if (!I.isDebugInstr() && TouchesMemory && ProtectFromEscapedAllocas) { SlotIndex Index = Indexes->getInstructionIndex(I); const LiveInterval *Interval = &*Intervals[FromSlot]; assert(Interval->find(Index) != Interval->end() && @@ -1064,16 +1072,16 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { SlotRemap.count(H.CatchObj.FrameIndex)) H.CatchObj.FrameIndex = SlotRemap[H.CatchObj.FrameIndex]; - DEBUG(dbgs()<<"Fixed "<<FixedMemOp<<" machine memory operands.\n"); - DEBUG(dbgs()<<"Fixed "<<FixedDbg<<" debug locations.\n"); - DEBUG(dbgs()<<"Fixed "<<FixedInstr<<" machine instructions.\n"); + LLVM_DEBUG(dbgs() << "Fixed " << FixedMemOp << " machine memory operands.\n"); + LLVM_DEBUG(dbgs() << "Fixed " << FixedDbg << " debug locations.\n"); + LLVM_DEBUG(dbgs() << "Fixed " << FixedInstr << " machine instructions.\n"); } void StackColoring::removeInvalidSlotRanges() { for (MachineBasicBlock &BB : *MF) for (MachineInstr &I : BB) { if (I.getOpcode() == TargetOpcode::LIFETIME_START || - I.getOpcode() == TargetOpcode::LIFETIME_END || I.isDebugValue()) + I.getOpcode() == TargetOpcode::LIFETIME_END || I.isDebugInstr()) continue; // Some intervals are suspicious! In some cases we find address @@ -1104,7 +1112,7 @@ void StackColoring::removeInvalidSlotRanges() { SlotIndex Index = Indexes->getInstructionIndex(I); if (Interval->find(Index) == Interval->end()) { Interval->clear(); - DEBUG(dbgs()<<"Invalidating range #"<<Slot<<"\n"); + LLVM_DEBUG(dbgs() << "Invalidating range #" << Slot << "\n"); EscapedAllocas++; } } @@ -1128,12 +1136,11 @@ void StackColoring::expungeSlotMap(DenseMap<int, int> &SlotRemap, } bool StackColoring::runOnMachineFunction(MachineFunction &Func) { - DEBUG(dbgs() << "********** Stack Coloring **********\n" - << "********** Function: " << Func.getName() << '\n'); + LLVM_DEBUG(dbgs() << "********** Stack Coloring **********\n" + << "********** Function: " << Func.getName() << '\n'); MF = &Func; MFI = &MF->getFrameInfo(); Indexes = &getAnalysis<SlotIndexes>(); - SP = &getAnalysis<StackProtector>(); BlockLiveness.clear(); BasicBlocks.clear(); BasicBlockNumbering.clear(); @@ -1156,21 +1163,23 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { unsigned NumMarkers = collectMarkers(NumSlots); unsigned TotalSize = 0; - DEBUG(dbgs()<<"Found "<<NumMarkers<<" markers and "<<NumSlots<<" slots\n"); - DEBUG(dbgs()<<"Slot structure:\n"); + LLVM_DEBUG(dbgs() << "Found " << NumMarkers << " markers and " << NumSlots + << " slots\n"); + LLVM_DEBUG(dbgs() << "Slot structure:\n"); for (int i=0; i < MFI->getObjectIndexEnd(); ++i) { - DEBUG(dbgs()<<"Slot #"<<i<<" - "<<MFI->getObjectSize(i)<<" bytes.\n"); + LLVM_DEBUG(dbgs() << "Slot #" << i << " - " << MFI->getObjectSize(i) + << " bytes.\n"); TotalSize += MFI->getObjectSize(i); } - DEBUG(dbgs()<<"Total Stack size: "<<TotalSize<<" bytes\n\n"); + LLVM_DEBUG(dbgs() << "Total Stack size: " << TotalSize << " bytes\n\n"); // Don't continue because there are not enough lifetime markers, or the // stack is too small, or we are told not to optimize the slots. if (NumMarkers < 2 || TotalSize < 16 || DisableColoring || skipFunction(Func.getFunction())) { - DEBUG(dbgs()<<"Will not try to merge slots.\n"); + LLVM_DEBUG(dbgs() << "Will not try to merge slots.\n"); return removeAllMarkers(); } @@ -1183,12 +1192,12 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { // Calculate the liveness of each block. calculateLocalLiveness(); - DEBUG(dbgs() << "Dataflow iterations: " << NumIterations << "\n"); - DEBUG(dump()); + LLVM_DEBUG(dbgs() << "Dataflow iterations: " << NumIterations << "\n"); + LLVM_DEBUG(dump()); // Propagate the liveness information. calculateLiveIntervals(NumSlots); - DEBUG(dumpIntervals()); + LLVM_DEBUG(dumpIntervals()); // Search for allocas which are used outside of the declared lifetime // markers. @@ -1224,7 +1233,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { }); for (auto &s : LiveStarts) - std::sort(s.begin(), s.end()); + llvm::sort(s.begin(), s.end()); bool Changed = true; while (Changed) { @@ -1259,8 +1268,8 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { SlotRemap[SecondSlot] = FirstSlot; SortedSlots[J] = -1; - DEBUG(dbgs()<<"Merging #"<<FirstSlot<<" and slots #"<< - SecondSlot<<" together.\n"); + LLVM_DEBUG(dbgs() << "Merging #" << FirstSlot << " and slots #" + << SecondSlot << " together.\n"); unsigned MaxAlignment = std::max(MFI->getObjectAlignment(FirstSlot), MFI->getObjectAlignment(SecondSlot)); @@ -1280,8 +1289,8 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { // Record statistics. StackSpaceSaved += ReducedSize; StackSlotMerged += RemovedSlots; - DEBUG(dbgs()<<"Merge "<<RemovedSlots<<" slots. Saved "<< - ReducedSize<<" bytes\n"); + LLVM_DEBUG(dbgs() << "Merge " << RemovedSlots << " slots. Saved " + << ReducedSize << " bytes\n"); // Scan the entire function and update all machine operands that use frame // indices to use the remapped frame index. diff --git a/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp b/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp index cc9af92c395f..00cf8070be5e 100644 --- a/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp +++ b/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp @@ -39,7 +39,7 @@ STATISTIC(NumBBsHaveNoStackmap, "Number of basic blocks with no stackmap"); STATISTIC(NumStackMaps, "Number of StackMaps visited"); namespace { -/// \brief This pass calculates the liveness information for each basic block in +/// This pass calculates the liveness information for each basic block in /// a function and attaches the register live-out information to a patchpoint /// intrinsic if present. /// @@ -54,10 +54,10 @@ class StackMapLiveness : public MachineFunctionPass { public: static char ID; - /// \brief Default construct and initialize the pass. + /// Default construct and initialize the pass. StackMapLiveness(); - /// \brief Tell the pass manager which passes we depend on and what + /// Tell the pass manager which passes we depend on and what /// information we preserve. void getAnalysisUsage(AnalysisUsage &AU) const override; @@ -66,17 +66,17 @@ public: MachineFunctionProperties::Property::NoVRegs); } - /// \brief Calculate the liveness information for the given machine function. + /// Calculate the liveness information for the given machine function. bool runOnMachineFunction(MachineFunction &MF) override; private: - /// \brief Performs the actual liveness calculation for the function. + /// Performs the actual liveness calculation for the function. bool calculateLiveness(MachineFunction &MF); - /// \brief Add the current register live set to the instruction. + /// Add the current register live set to the instruction. void addLiveOutSetToMI(MachineFunction &MF, MachineInstr &MI); - /// \brief Create a register mask and initialize it with the registers from + /// Create a register mask and initialize it with the registers from /// the register live set. uint32_t *createRegisterMask(MachineFunction &MF) const; }; @@ -106,8 +106,8 @@ bool StackMapLiveness::runOnMachineFunction(MachineFunction &MF) { if (!EnablePatchPointLiveness) return false; - DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: " << MF.getName() - << " **********\n"); + LLVM_DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: " + << MF.getName() << " **********\n"); TRI = MF.getSubtarget().getRegisterInfo(); ++NumStackMapFuncVisited; @@ -124,7 +124,7 @@ bool StackMapLiveness::calculateLiveness(MachineFunction &MF) { bool HasChanged = false; // For all basic blocks in the function. for (auto &MBB : MF) { - DEBUG(dbgs() << "****** BB " << MBB.getName() << " ******\n"); + LLVM_DEBUG(dbgs() << "****** BB " << MBB.getName() << " ******\n"); LiveRegs.init(*TRI); // FIXME: This should probably be addLiveOuts(). LiveRegs.addLiveOutsNoPristines(MBB); @@ -138,7 +138,7 @@ bool StackMapLiveness::calculateLiveness(MachineFunction &MF) { HasStackMap = true; ++NumStackMaps; } - DEBUG(dbgs() << " " << LiveRegs << " " << *I); + LLVM_DEBUG(dbgs() << " " << LiveRegs << " " << *I); LiveRegs.stepBackward(*I); } ++NumBBsVisited; @@ -160,7 +160,7 @@ void StackMapLiveness::addLiveOutSetToMI(MachineFunction &MF, /// register live set. uint32_t *StackMapLiveness::createRegisterMask(MachineFunction &MF) const { // The mask is owned and cleaned up by the Machine Function. - uint32_t *Mask = MF.allocateRegisterMask(TRI->getNumRegs()); + uint32_t *Mask = MF.allocateRegMask(); for (auto Reg : LiveRegs) Mask[Reg / 32] |= 1U << (Reg % 32); diff --git a/contrib/llvm/lib/CodeGen/StackMaps.cpp b/contrib/llvm/lib/CodeGen/StackMaps.cpp index e66a25bec911..19a191c01db9 100644 --- a/contrib/llvm/lib/CodeGen/StackMaps.cpp +++ b/contrib/llvm/lib/CodeGen/StackMaps.cpp @@ -268,11 +268,11 @@ StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const { // in the list. Merge entries that refer to the same dwarf register and use // the maximum size that needs to be spilled. - std::sort(LiveOuts.begin(), LiveOuts.end(), - [](const LiveOutReg &LHS, const LiveOutReg &RHS) { - // Only sort by the dwarf register number. - return LHS.DwarfRegNum < RHS.DwarfRegNum; - }); + llvm::sort(LiveOuts.begin(), LiveOuts.end(), + [](const LiveOutReg &LHS, const LiveOutReg &RHS) { + // Only sort by the dwarf register number. + return LHS.DwarfRegNum < RHS.DwarfRegNum; + }); for (auto I = LiveOuts.begin(), E = LiveOuts.end(); I != E; ++I) { for (auto II = std::next(I); II != E; ++II) { @@ -420,13 +420,13 @@ void StackMaps::emitStackmapHeader(MCStreamer &OS) { OS.EmitIntValue(0, 2); // Reserved. // Num functions. - DEBUG(dbgs() << WSMP << "#functions = " << FnInfos.size() << '\n'); + LLVM_DEBUG(dbgs() << WSMP << "#functions = " << FnInfos.size() << '\n'); OS.EmitIntValue(FnInfos.size(), 4); // Num constants. - DEBUG(dbgs() << WSMP << "#constants = " << ConstPool.size() << '\n'); + LLVM_DEBUG(dbgs() << WSMP << "#constants = " << ConstPool.size() << '\n'); OS.EmitIntValue(ConstPool.size(), 4); // Num callsites. - DEBUG(dbgs() << WSMP << "#callsites = " << CSInfos.size() << '\n'); + LLVM_DEBUG(dbgs() << WSMP << "#callsites = " << CSInfos.size() << '\n'); OS.EmitIntValue(CSInfos.size(), 4); } @@ -439,11 +439,11 @@ void StackMaps::emitStackmapHeader(MCStreamer &OS) { /// } void StackMaps::emitFunctionFrameRecords(MCStreamer &OS) { // Function Frame records. - DEBUG(dbgs() << WSMP << "functions:\n"); + LLVM_DEBUG(dbgs() << WSMP << "functions:\n"); for (auto const &FR : FnInfos) { - DEBUG(dbgs() << WSMP << "function addr: " << FR.first - << " frame size: " << FR.second.StackSize - << " callsite count: " << FR.second.RecordCount << '\n'); + LLVM_DEBUG(dbgs() << WSMP << "function addr: " << FR.first + << " frame size: " << FR.second.StackSize + << " callsite count: " << FR.second.RecordCount << '\n'); OS.EmitSymbolValue(FR.first, 8); OS.EmitIntValue(FR.second.StackSize, 8); OS.EmitIntValue(FR.second.RecordCount, 8); @@ -455,9 +455,9 @@ void StackMaps::emitFunctionFrameRecords(MCStreamer &OS) { /// int64 : Constants[NumConstants] void StackMaps::emitConstantPoolEntries(MCStreamer &OS) { // Constant pool entries. - DEBUG(dbgs() << WSMP << "constants:\n"); + LLVM_DEBUG(dbgs() << WSMP << "constants:\n"); for (const auto &ConstEntry : ConstPool) { - DEBUG(dbgs() << WSMP << ConstEntry.second << '\n'); + LLVM_DEBUG(dbgs() << WSMP << ConstEntry.second << '\n'); OS.EmitIntValue(ConstEntry.second, 8); } } @@ -492,7 +492,7 @@ void StackMaps::emitConstantPoolEntries(MCStreamer &OS) { /// 0x4, Constant, Offset (small constant) /// 0x5, ConstIndex, Constants[Offset] (large constant) void StackMaps::emitCallsiteEntries(MCStreamer &OS) { - DEBUG(print(dbgs())); + LLVM_DEBUG(print(dbgs())); // Callsite entries. for (const auto &CSI : CSInfos) { const LocationVec &CSLocs = CSI.Locations; @@ -569,7 +569,7 @@ void StackMaps::serializeToStackMapSection() { OS.EmitLabel(OutContext.getOrCreateSymbol(Twine("__LLVM_StackMaps"))); // Serialize data. - DEBUG(dbgs() << "********** Stack Map Output **********\n"); + LLVM_DEBUG(dbgs() << "********** Stack Map Output **********\n"); emitStackmapHeader(OS); emitFunctionFrameRecords(OS); emitConstantPoolEntries(OS); diff --git a/contrib/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm/lib/CodeGen/StackProtector.cpp index 62cef95a4af2..cb12c7ce6e82 100644 --- a/contrib/llvm/lib/CodeGen/StackProtector.cpp +++ b/contrib/llvm/lib/CodeGen/StackProtector.cpp @@ -36,6 +36,7 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" @@ -69,32 +70,6 @@ INITIALIZE_PASS_END(StackProtector, DEBUG_TYPE, FunctionPass *llvm::createStackProtectorPass() { return new StackProtector(); } -StackProtector::SSPLayoutKind -StackProtector::getSSPLayout(const AllocaInst *AI) const { - return AI ? Layout.lookup(AI) : SSPLK_None; -} - -void StackProtector::adjustForColoring(const AllocaInst *From, - const AllocaInst *To) { - // When coloring replaces one alloca with another, transfer the SSPLayoutKind - // tag from the remapped to the target alloca. The remapped alloca should - // have a size smaller than or equal to the replacement alloca. - SSPLayoutMap::iterator I = Layout.find(From); - if (I != Layout.end()) { - SSPLayoutKind Kind = I->second; - Layout.erase(I); - - // Transfer the tag, but make sure that SSPLK_AddrOf does not overwrite - // SSPLK_SmallArray or SSPLK_LargeArray, and make sure that - // SSPLK_SmallArray does not overwrite SSPLK_LargeArray. - I = Layout.find(To); - if (I == Layout.end()) - Layout.insert(std::make_pair(To, Kind)); - else if (I->second != SSPLK_LargeArray && Kind != SSPLK_AddrOf) - I->second = Kind; - } -} - void StackProtector::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<TargetPassConfig>(); AU.addPreserved<DominatorTreeWrapperPass>(); @@ -182,6 +157,14 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge, return NeedsProtector; } +static bool isLifetimeInst(const Instruction *I) { + if (const auto Intrinsic = dyn_cast<IntrinsicInst>(I)) { + const auto Id = Intrinsic->getIntrinsicID(); + return Id == Intrinsic::lifetime_start || Id == Intrinsic::lifetime_end; + } + return false; +} + bool StackProtector::HasAddressTaken(const Instruction *AI) { for (const User *U : AI->users()) { if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { @@ -190,8 +173,10 @@ bool StackProtector::HasAddressTaken(const Instruction *AI) { } else if (const PtrToIntInst *SI = dyn_cast<PtrToIntInst>(U)) { if (AI == SI->getOperand(0)) return true; - } else if (isa<CallInst>(U)) { - return true; + } else if (const CallInst *CI = dyn_cast<CallInst>(U)) { + // Ignore intrinsics that are not calls. TODO: Use isLoweredToCall(). + if (!isa<DbgInfoIntrinsic>(CI) && !isLifetimeInst(CI)) + return true; } else if (isa<InvokeInst>(U)) { return true; } else if (const SelectInst *SI = dyn_cast<SelectInst>(U)) { @@ -214,7 +199,7 @@ bool StackProtector::HasAddressTaken(const Instruction *AI) { return false; } -/// \brief Check whether or not this function needs a stack protector based +/// Check whether or not this function needs a stack protector based /// upon the stack protector level. /// /// We use two heuristics: a standard (ssp) and strong (sspstrong). @@ -278,18 +263,21 @@ bool StackProtector::RequiresStackProtector() { if (CI->getLimitedValue(SSPBufferSize) >= SSPBufferSize) { // A call to alloca with size >= SSPBufferSize requires // stack protectors. - Layout.insert(std::make_pair(AI, SSPLK_LargeArray)); + Layout.insert(std::make_pair(AI, + MachineFrameInfo::SSPLK_LargeArray)); ORE.emit(RemarkBuilder); NeedsProtector = true; } else if (Strong) { // Require protectors for all alloca calls in strong mode. - Layout.insert(std::make_pair(AI, SSPLK_SmallArray)); + Layout.insert(std::make_pair(AI, + MachineFrameInfo::SSPLK_SmallArray)); ORE.emit(RemarkBuilder); NeedsProtector = true; } } else { // A call to alloca with a variable size requires protectors. - Layout.insert(std::make_pair(AI, SSPLK_LargeArray)); + Layout.insert(std::make_pair(AI, + MachineFrameInfo::SSPLK_LargeArray)); ORE.emit(RemarkBuilder); NeedsProtector = true; } @@ -298,8 +286,9 @@ bool StackProtector::RequiresStackProtector() { bool IsLarge = false; if (ContainsProtectableArray(AI->getAllocatedType(), IsLarge, Strong)) { - Layout.insert(std::make_pair(AI, IsLarge ? SSPLK_LargeArray - : SSPLK_SmallArray)); + Layout.insert(std::make_pair(AI, IsLarge + ? MachineFrameInfo::SSPLK_LargeArray + : MachineFrameInfo::SSPLK_SmallArray)); ORE.emit([&]() { return OptimizationRemark(DEBUG_TYPE, "StackProtectorBuffer", &I) << "Stack protection applied to function " @@ -313,7 +302,7 @@ bool StackProtector::RequiresStackProtector() { if (Strong && HasAddressTaken(AI)) { ++NumAddrTaken; - Layout.insert(std::make_pair(AI, SSPLK_AddrOf)); + Layout.insert(std::make_pair(AI, MachineFrameInfo::SSPLK_AddrOf)); ORE.emit([&]() { return OptimizationRemark(DEBUG_TYPE, "StackProtectorAddressTaken", &I) @@ -523,3 +512,23 @@ BasicBlock *StackProtector::CreateFailBB() { bool StackProtector::shouldEmitSDCheck(const BasicBlock &BB) const { return HasPrologue && !HasIRCheck && dyn_cast<ReturnInst>(BB.getTerminator()); } + +void StackProtector::copyToMachineFrameInfo(MachineFrameInfo &MFI) const { + if (Layout.empty()) + return; + + for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) { + if (MFI.isDeadObjectIndex(I)) + continue; + + const AllocaInst *AI = MFI.getObjectAllocation(I); + if (!AI) + continue; + + SSPLayoutMap::const_iterator LI = Layout.find(AI); + if (LI == Layout.end()) + continue; + + MFI.setObjectSSPLayout(I, LI->second); + } +} diff --git a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp index 8fc7a4a32842..eb15b15a24a6 100644 --- a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp +++ b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp @@ -82,14 +82,14 @@ namespace { // AllColors - If index is set, it's a spill slot, i.e. color. // FIXME: This assumes PEI locate spill slot with smaller indices // closest to stack pointer / frame pointer. Therefore, smaller - // index == better color. - BitVector AllColors; + // index == better color. This is per stack ID. + SmallVector<BitVector, 2> AllColors; - // NextColor - Next "color" that's not yet used. - int NextColor = -1; + // NextColor - Next "color" that's not yet used. This is per stack ID. + SmallVector<int, 2> NextColors = { -1 }; - // UsedColors - "Colors" that have been assigned. - BitVector UsedColors; + // UsedColors - "Colors" that have been assigned. This is per stack ID + SmallVector<BitVector, 2> UsedColors; // Assignments - Color to intervals mapping. SmallVector<SmallVector<LiveInterval*,4>, 16> Assignments; @@ -196,10 +196,15 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) { /// to a sorted (by weight) list. void StackSlotColoring::InitializeSlots() { int LastFI = MFI->getObjectIndexEnd(); + + // There is always at least one stack ID. + AllColors.resize(1); + UsedColors.resize(1); + OrigAlignments.resize(LastFI); OrigSizes.resize(LastFI); - AllColors.resize(LastFI); - UsedColors.resize(LastFI); + AllColors[0].resize(LastFI); + UsedColors[0].resize(LastFI); Assignments.resize(LastFI); using Pair = std::iterator_traits<LiveStacks::iterator>::value_type; @@ -209,29 +214,42 @@ void StackSlotColoring::InitializeSlots() { Intervals.reserve(LS->getNumIntervals()); for (auto &I : *LS) Intervals.push_back(&I); - std::sort(Intervals.begin(), Intervals.end(), - [](Pair *LHS, Pair *RHS) { return LHS->first < RHS->first; }); + llvm::sort(Intervals.begin(), Intervals.end(), + [](Pair *LHS, Pair *RHS) { return LHS->first < RHS->first; }); // Gather all spill slots into a list. - DEBUG(dbgs() << "Spill slot intervals:\n"); + LLVM_DEBUG(dbgs() << "Spill slot intervals:\n"); for (auto *I : Intervals) { LiveInterval &li = I->second; - DEBUG(li.dump()); + LLVM_DEBUG(li.dump()); int FI = TargetRegisterInfo::stackSlot2Index(li.reg); if (MFI->isDeadObjectIndex(FI)) continue; + SSIntervals.push_back(&li); OrigAlignments[FI] = MFI->getObjectAlignment(FI); OrigSizes[FI] = MFI->getObjectSize(FI); - AllColors.set(FI); + + auto StackID = MFI->getStackID(FI); + if (StackID != 0) { + AllColors.resize(StackID + 1); + UsedColors.resize(StackID + 1); + AllColors[StackID].resize(LastFI); + UsedColors[StackID].resize(LastFI); + } + + AllColors[StackID].set(FI); } - DEBUG(dbgs() << '\n'); + LLVM_DEBUG(dbgs() << '\n'); // Sort them by weight. std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter()); + NextColors.resize(AllColors.size()); + // Get first "color". - NextColor = AllColors.find_first(); + for (unsigned I = 0, E = AllColors.size(); I != E; ++I) + NextColors[I] = AllColors[I].find_first(); } /// OverlapWithAssignments - Return true if LiveInterval overlaps with any @@ -252,37 +270,41 @@ int StackSlotColoring::ColorSlot(LiveInterval *li) { int Color = -1; bool Share = false; int FI = TargetRegisterInfo::stackSlot2Index(li->reg); + uint8_t StackID = MFI->getStackID(FI); if (!DisableSharing) { + // Check if it's possible to reuse any of the used colors. - Color = UsedColors.find_first(); + Color = UsedColors[StackID].find_first(); while (Color != -1) { if (!OverlapWithAssignments(li, Color)) { Share = true; ++NumEliminated; break; } - Color = UsedColors.find_next(Color); + Color = UsedColors[StackID].find_next(Color); } } if (Color != -1 && MFI->getStackID(Color) != MFI->getStackID(FI)) { - DEBUG(dbgs() << "cannot share FIs with different stack IDs\n"); + LLVM_DEBUG(dbgs() << "cannot share FIs with different stack IDs\n"); Share = false; } // Assign it to the first available color (assumed to be the best) if it's // not possible to share a used color with other objects. if (!Share) { - assert(NextColor != -1 && "No more spill slots?"); - Color = NextColor; - UsedColors.set(Color); - NextColor = AllColors.find_next(NextColor); + assert(NextColors[StackID] != -1 && "No more spill slots?"); + Color = NextColors[StackID]; + UsedColors[StackID].set(Color); + NextColors[StackID] = AllColors[StackID].find_next(NextColors[StackID]); } + assert(MFI->getStackID(Color) == MFI->getStackID(FI)); + // Record the assignment. Assignments[Color].push_back(li); - DEBUG(dbgs() << "Assigning fi#" << FI << " to fi#" << Color << "\n"); + LLVM_DEBUG(dbgs() << "Assigning fi#" << FI << " to fi#" << Color << "\n"); // Change size and alignment of the allocated slot. If there are multiple // objects sharing the same slot, then make sure the size and alignment @@ -305,7 +327,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { SmallVector<SmallVector<int, 4>, 16> RevMap(NumObjs); BitVector UsedColors(NumObjs); - DEBUG(dbgs() << "Color spill slot intervals:\n"); + LLVM_DEBUG(dbgs() << "Color spill slot intervals:\n"); bool Changed = false; for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { LiveInterval *li = SSIntervals[i]; @@ -319,7 +341,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { Changed |= (SS != NewSS); } - DEBUG(dbgs() << "\nSpill slots after coloring:\n"); + LLVM_DEBUG(dbgs() << "\nSpill slots after coloring:\n"); for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { LiveInterval *li = SSIntervals[i]; int SS = TargetRegisterInfo::stackSlot2Index(li->reg); @@ -330,8 +352,8 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { #ifndef NDEBUG for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) - DEBUG(SSIntervals[i]->dump()); - DEBUG(dbgs() << '\n'); + LLVM_DEBUG(SSIntervals[i]->dump()); + LLVM_DEBUG(dbgs() << '\n'); #endif if (!Changed) @@ -357,10 +379,13 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { } // Delete unused stack slots. - while (NextColor != -1) { - DEBUG(dbgs() << "Removing unused stack object fi#" << NextColor << "\n"); - MFI->RemoveStackObject(NextColor); - NextColor = AllColors.find_next(NextColor); + for (int StackID = 0, E = AllColors.size(); StackID != E; ++StackID) { + int NextColor = NextColors[StackID]; + while (NextColor != -1) { + LLVM_DEBUG(dbgs() << "Removing unused stack object fi#" << NextColor << "\n"); + MFI->RemoveStackObject(NextColor); + NextColor = AllColors[StackID].find_next(NextColor); + } } return true; @@ -382,6 +407,8 @@ void StackSlotColoring::RewriteInstruction(MachineInstr &MI, int NewFI = SlotMapping[OldFI]; if (NewFI == -1 || NewFI == OldFI) continue; + + assert(MFI->getStackID(OldFI) == MFI->getStackID(NewFI)); MO.setIndex(NewFI); } @@ -418,17 +445,21 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { unsigned LoadReg = 0; unsigned StoreReg = 0; - if (!(LoadReg = TII->isLoadFromStackSlot(*I, FirstSS))) + unsigned LoadSize = 0; + unsigned StoreSize = 0; + if (!(LoadReg = TII->isLoadFromStackSlot(*I, FirstSS, LoadSize))) continue; // Skip the ...pseudo debugging... instructions between a load and store. - while ((NextMI != E) && NextMI->isDebugValue()) { + while ((NextMI != E) && NextMI->isDebugInstr()) { ++NextMI; ++I; } if (NextMI == E) continue; - if (!(StoreReg = TII->isStoreToStackSlot(*NextMI, SecondSS))) + if (!(StoreReg = TII->isStoreToStackSlot(*NextMI, SecondSS, StoreSize))) + continue; + if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1 || + LoadSize != StoreSize) continue; - if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1) continue; ++NumDead; changed = true; @@ -450,10 +481,13 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { } bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) { - DEBUG({ - dbgs() << "********** Stack Slot Coloring **********\n" - << "********** Function: " << MF.getName() << '\n'; - }); + LLVM_DEBUG({ + dbgs() << "********** Stack Slot Coloring **********\n" + << "********** Function: " << MF.getName() << '\n'; + }); + + if (skipFunction(MF.getFunction())) + return false; MFI = &MF.getFrameInfo(); TII = MF.getSubtarget().getInstrInfo(); @@ -479,7 +513,9 @@ bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) { InitializeSlots(); Changed = ColorSlots(MF); - NextColor = -1; + for (int &Next : NextColors) + Next = -1; + SSIntervals.clear(); for (unsigned i = 0, e = SSRefs.size(); i != e; ++i) SSRefs[i].clear(); diff --git a/contrib/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm/lib/CodeGen/TailDuplication.cpp index df1eebf43b2b..25cd7802264e 100644 --- a/contrib/llvm/lib/CodeGen/TailDuplication.cpp +++ b/contrib/llvm/lib/CodeGen/TailDuplication.cpp @@ -7,8 +7,9 @@ // //===----------------------------------------------------------------------===// // -// This pass duplicates basic blocks ending in unconditional branches into -// the tails of their predecessors, using the TailDuplicator utility class. +/// \file This pass duplicates basic blocks ending in unconditional branches +/// into the tails of their predecessors, using the TailDuplicator utility +/// class. // //===----------------------------------------------------------------------===// @@ -26,38 +27,55 @@ using namespace llvm; namespace { -/// Perform tail duplication. Delegates to TailDuplicator -class TailDuplicatePass : public MachineFunctionPass { +class TailDuplicateBase : public MachineFunctionPass { TailDuplicator Duplicator; - + bool PreRegAlloc; public: - static char ID; - - explicit TailDuplicatePass() : MachineFunctionPass(ID) {} + TailDuplicateBase(char &PassID, bool PreRegAlloc) + : MachineFunctionPass(PassID), PreRegAlloc(PreRegAlloc) {} bool runOnMachineFunction(MachineFunction &MF) override; - void getAnalysisUsage(AnalysisUsage &AU) const override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<MachineBranchProbabilityInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; + +class TailDuplicate : public TailDuplicateBase { +public: + static char ID; + TailDuplicate() : TailDuplicateBase(ID, false) { + initializeTailDuplicatePass(*PassRegistry::getPassRegistry()); + } +}; + +class EarlyTailDuplicate : public TailDuplicateBase { +public: + static char ID; + EarlyTailDuplicate() : TailDuplicateBase(ID, true) { + initializeEarlyTailDuplicatePass(*PassRegistry::getPassRegistry()); + } }; } // end anonymous namespace -char TailDuplicatePass::ID = 0; +char TailDuplicate::ID; +char EarlyTailDuplicate::ID; -char &llvm::TailDuplicateID = TailDuplicatePass::ID; +char &llvm::TailDuplicateID = TailDuplicate::ID; +char &llvm::EarlyTailDuplicateID = EarlyTailDuplicate::ID; -INITIALIZE_PASS(TailDuplicatePass, DEBUG_TYPE, "Tail Duplication", false, false) +INITIALIZE_PASS(TailDuplicate, DEBUG_TYPE, "Tail Duplication", false, false) +INITIALIZE_PASS(EarlyTailDuplicate, "early-tailduplication", + "Early Tail Duplication", false, false) -bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) { +bool TailDuplicateBase::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; auto MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); - - // TODO: Querying isSSA() to determine pre-/post-regalloc is fragile, better - // split this into two passes instead. - bool PreRegAlloc = MF.getRegInfo().isSSA(); - Duplicator.initMF(MF, PreRegAlloc, MBPI, /* LayoutMode */ false); + Duplicator.initMF(MF, PreRegAlloc, MBPI, /*LayoutMode=*/false); bool MadeChange = false; while (Duplicator.tailDuplicateBlocks()) @@ -65,8 +83,3 @@ bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) { return MadeChange; } - -void TailDuplicatePass::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<MachineBranchProbabilityInfo>(); - MachineFunctionPass::getAnalysisUsage(AU); -} diff --git a/contrib/llvm/lib/CodeGen/TailDuplicator.cpp b/contrib/llvm/lib/CodeGen/TailDuplicator.cpp index f51c884839b3..b118c176a897 100644 --- a/contrib/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/contrib/llvm/lib/CodeGen/TailDuplicator.cpp @@ -37,6 +37,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" #include <algorithm> #include <cassert> #include <iterator> @@ -261,7 +262,7 @@ bool TailDuplicator::tailDuplicateBlocks() { bool MadeChange = false; if (PreRegAlloc && TailDupVerify) { - DEBUG(dbgs() << "\n*** Before tail-duplicating\n"); + LLVM_DEBUG(dbgs() << "\n*** Before tail-duplicating\n"); VerifyPHIs(*MF, true); } @@ -371,6 +372,13 @@ void TailDuplicator::duplicateInstruction( MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, DenseMap<unsigned, RegSubRegPair> &LocalVRMap, const DenseSet<unsigned> &UsedByPhi) { + // Allow duplication of CFI instructions. + if (MI->isCFIInstruction()) { + BuildMI(*PredBB, PredBB->end(), PredBB->findDebugLoc(PredBB->begin()), + TII->get(TargetOpcode::CFI_INSTRUCTION)).addCFIIndex( + MI->getOperand(0).getCFIIndex()); + return; + } MachineInstr &NewMI = TII->duplicate(*PredBB, PredBB->end(), *MI); if (PreRegAlloc) { for (unsigned i = 0, e = NewMI.getNumOperands(); i != e; ++i) { @@ -585,7 +593,13 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, unsigned InstrCount = 0; for (MachineInstr &MI : TailBB) { // Non-duplicable things shouldn't be tail-duplicated. - if (MI.isNotDuplicable()) + // CFI instructions are marked as non-duplicable, because Darwin compact + // unwind info emission can't handle multiple prologue setups. In case of + // DWARF, allow them be duplicated, so that their existence doesn't prevent + // tail duplication of some basic blocks, that would be duplicated otherwise. + if (MI.isNotDuplicable() && + (TailBB.getParent()->getTarget().getTargetTriple().isOSDarwin() || + !MI.isCFIInstruction())) return false; // Convergent instructions can be duplicated only if doing so doesn't add @@ -605,7 +619,7 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, if (PreRegAlloc && MI.isCall()) return false; - if (!MI.isPHI() && !MI.isDebugValue()) + if (!MI.isPHI() && !MI.isMetaInstruction()) InstrCount += 1; if (InstrCount > MaxDuplicateCount) @@ -704,8 +718,8 @@ bool TailDuplicator::duplicateSimpleBB( continue; Changed = true; - DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB - << "From simple Succ: " << *TailBB); + LLVM_DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB + << "From simple Succ: " << *TailBB); MachineBasicBlock *NewTarget = *TailBB->succ_begin(); MachineBasicBlock *NextBB = PredBB->getNextNode(); @@ -785,8 +799,8 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, MachineBasicBlock *ForcedLayoutPred, SmallVectorImpl<MachineBasicBlock *> &TDBBs, SmallVectorImpl<MachineInstr *> &Copies) { - DEBUG(dbgs() << "\n*** Tail-duplicating " << printMBBReference(*TailBB) - << '\n'); + LLVM_DEBUG(dbgs() << "\n*** Tail-duplicating " << printMBBReference(*TailBB) + << '\n'); DenseSet<unsigned> UsedByPhi; getRegsUsedByPHIs(*TailBB, &UsedByPhi); @@ -816,8 +830,8 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, if (IsLayoutSuccessor) continue; - DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB - << "From Succ: " << *TailBB); + LLVM_DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB + << "From Succ: " << *TailBB); TDBBs.push_back(PredBB); @@ -879,8 +893,8 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, (!PriorTBB || PriorTBB == TailBB) && TailBB->pred_size() == 1 && !TailBB->hasAddressTaken()) { - DEBUG(dbgs() << "\nMerging into block: " << *PrevBB - << "From MBB: " << *TailBB); + LLVM_DEBUG(dbgs() << "\nMerging into block: " << *PrevBB + << "From MBB: " << *TailBB); // There may be a branch to the layout successor. This is unlikely but it // happens. The correct thing to do is to remove the branch before // duplicating the instructions in all cases. @@ -985,7 +999,7 @@ void TailDuplicator::removeDeadBlock( MachineBasicBlock *MBB, function_ref<void(MachineBasicBlock *)> *RemovalCallback) { assert(MBB->pred_empty() && "MBB must be dead!"); - DEBUG(dbgs() << "\nRemoving MBB: " << *MBB); + LLVM_DEBUG(dbgs() << "\nRemoving MBB: " << *MBB); if (RemovalCallback) (*RemovalCallback)(MBB); diff --git a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp index b2151eb49655..f0cfa2fbe4fd 100644 --- a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -36,6 +36,13 @@ bool TargetFrameLowering::noFramePointerElim(const MachineFunction &MF) const { return Attr.getValueAsString() == "true"; } +bool TargetFrameLowering::enableCalleeSaveSkip(const MachineFunction &MF) const { + assert(MF.getFunction().hasFnAttribute(Attribute::NoReturn) && + MF.getFunction().hasFnAttribute(Attribute::NoUnwind) && + !MF.getFunction().hasFnAttribute(Attribute::UWTable)); + return false; +} + /// Returns the displacement from the frame register to the stack /// frame of the specified index, along with the frame register used /// (in output arg FrameReg). This is the default implementation which @@ -85,6 +92,19 @@ void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF, if (MF.getFunction().hasFnAttribute(Attribute::Naked)) return; + // Noreturn+nounwind functions never restore CSR, so no saves are needed. + // Purely noreturn functions may still return through throws, so those must + // save CSR for caller exception handlers. + // + // If the function uses longjmp to break out of its current path of + // execution we do not need the CSR spills either: setjmp stores all CSRs + // it was called with into the jmp_buf, which longjmp then restores. + if (MF.getFunction().hasFnAttribute(Attribute::NoReturn) && + MF.getFunction().hasFnAttribute(Attribute::NoUnwind) && + !MF.getFunction().hasFnAttribute(Attribute::UWTable) && + enableCalleeSaveSkip(MF)) + return; + // Functions which call __builtin_unwind_init get all their registers saved. bool CallsUnwindInit = MF.callsUnwindInit(); const MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -100,7 +120,16 @@ unsigned TargetFrameLowering::getStackAlignmentSkew( // When HHVM function is called, the stack is skewed as the return address // is removed from the stack before we enter the function. if (LLVM_UNLIKELY(MF.getFunction().getCallingConv() == CallingConv::HHVM)) - return MF.getTarget().getPointerSize(); + return MF.getTarget().getAllocaPointerSize(); return 0; } + +int TargetFrameLowering::getInitialCFAOffset(const MachineFunction &MF) const { + llvm_unreachable("getInitialCFAOffset() not implemented!"); +} + +unsigned TargetFrameLowering::getInitialCFARegister(const MachineFunction &MF) + const { + llvm_unreachable("getInitialCFARegister() not implemented!"); +}
\ No newline at end of file diff --git a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp index bd90ed5b55b8..963f8178b509 100644 --- a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -174,6 +174,14 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool Reg2IsUndef = MI.getOperand(Idx2).isUndef(); bool Reg1IsInternal = MI.getOperand(Idx1).isInternalRead(); bool Reg2IsInternal = MI.getOperand(Idx2).isInternalRead(); + // Avoid calling isRenamable for virtual registers since we assert that + // renamable property is only queried/set for physical registers. + bool Reg1IsRenamable = TargetRegisterInfo::isPhysicalRegister(Reg1) + ? MI.getOperand(Idx1).isRenamable() + : false; + bool Reg2IsRenamable = TargetRegisterInfo::isPhysicalRegister(Reg2) + ? MI.getOperand(Idx2).isRenamable() + : false; // If destination is tied to either of the commuted source register, then // it must be updated. if (HasDef && Reg0 == Reg1 && @@ -211,6 +219,12 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI, CommutedMI->getOperand(Idx1).setIsUndef(Reg2IsUndef); CommutedMI->getOperand(Idx2).setIsInternalRead(Reg1IsInternal); CommutedMI->getOperand(Idx1).setIsInternalRead(Reg2IsInternal); + // Avoid calling setIsRenamable for virtual registers since we assert that + // renamable property is only queried/set for physical registers. + if (TargetRegisterInfo::isPhysicalRegister(Reg1)) + CommutedMI->getOperand(Idx2).setIsRenamable(Reg1IsRenamable); + if (TargetRegisterInfo::isPhysicalRegister(Reg2)) + CommutedMI->getOperand(Idx1).setIsRenamable(Reg2IsRenamable); return CommutedMI; } diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp index b29a33ac1c14..43f4bad595e3 100644 --- a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -28,7 +28,6 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetLowering.h" @@ -50,6 +49,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetMachine.h" #include <algorithm> @@ -118,7 +118,7 @@ static cl::opt<int> MinPercentageForPredictableBranch( void TargetLoweringBase::InitLibcalls(const Triple &TT) { #define HANDLE_LIBCALL(code, name) \ setLibcallName(RTLIB::code, name); -#include "llvm/CodeGen/RuntimeLibcalls.def" +#include "llvm/IR/RuntimeLibcalls.def" #undef HANDLE_LIBCALL // Initialize calling conventions to their default. for (int LC = 0; LC < RTLIB::UNKNOWN_LIBCALL; ++LC) @@ -192,6 +192,9 @@ RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) { return FPEXT_F64_F128; else if (RetVT == MVT::ppcf128) return FPEXT_F64_PPCF128; + } else if (OpVT == MVT::f80) { + if (RetVT == MVT::f128) + return FPEXT_F80_F128; } return UNKNOWN_LIBCALL; @@ -227,6 +230,9 @@ RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) { return FPROUND_F128_F64; if (OpVT == MVT::ppcf128) return FPROUND_PPCF128_F64; + } else if (RetVT == MVT::f80) { + if (OpVT == MVT::f128) + return FPROUND_F128_F80; } return UNKNOWN_LIBCALL; @@ -529,6 +535,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) { // Perform these initializations only once. MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = MaxLoadsPerMemcmp = 8; + MaxGluedStoresPerMemcpy = 0; MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize = MaxStoresPerMemmoveOptSize = MaxLoadsPerMemcmpOptSize = 4; UseUnderscoreSetJmp = false; @@ -614,6 +621,12 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::SUBCARRY, VT, Expand); setOperationAction(ISD::SETCCCARRY, VT, Expand); + // ADDC/ADDE/SUBC/SUBE default to expand. + setOperationAction(ISD::ADDC, VT, Expand); + setOperationAction(ISD::ADDE, VT, Expand); + setOperationAction(ISD::SUBC, VT, Expand); + setOperationAction(ISD::SUBE, VT, Expand); + // These default to Expand so they will be expanded to CTLZ/CTTZ by default. setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); @@ -679,12 +692,13 @@ MVT TargetLoweringBase::getScalarShiftAmountTy(const DataLayout &DL, return MVT::getIntegerVT(8 * DL.getPointerSize(0)); } -EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy, - const DataLayout &DL) const { +EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy, const DataLayout &DL, + bool LegalTypes) const { assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); if (LHSTy.isVector()) return LHSTy; - return getScalarShiftAmountTy(DL, LHSTy); + return LegalTypes ? getScalarShiftAmountTy(DL, LHSTy) + : getPointerTy(DL); } bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const { @@ -979,6 +993,36 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI, return MBB; } +MachineBasicBlock * +TargetLoweringBase::emitXRayCustomEvent(MachineInstr &MI, + MachineBasicBlock *MBB) const { + assert(MI.getOpcode() == TargetOpcode::PATCHABLE_EVENT_CALL && + "Called emitXRayCustomEvent on the wrong MI!"); + auto &MF = *MI.getMF(); + auto MIB = BuildMI(MF, MI.getDebugLoc(), MI.getDesc()); + for (unsigned OpIdx = 0; OpIdx != MI.getNumOperands(); ++OpIdx) + MIB.add(MI.getOperand(OpIdx)); + + MBB->insert(MachineBasicBlock::iterator(MI), MIB); + MI.eraseFromParent(); + return MBB; +} + +MachineBasicBlock * +TargetLoweringBase::emitXRayTypedEvent(MachineInstr &MI, + MachineBasicBlock *MBB) const { + assert(MI.getOpcode() == TargetOpcode::PATCHABLE_TYPED_EVENT_CALL && + "Called emitXRayTypedEvent on the wrong MI!"); + auto &MF = *MI.getMF(); + auto MIB = BuildMI(MF, MI.getDebugLoc(), MI.getDesc()); + for (unsigned OpIdx = 0; OpIdx != MI.getNumOperands(); ++OpIdx) + MIB.add(MI.getOperand(OpIdx)); + + MBB->insert(MachineBasicBlock::iterator(MI), MIB); + MI.eraseFromParent(); + return MBB; +} + /// findRepresentativeClass - Return the largest legal super-reg register class /// of the register class for the specified type and its associated "cost". // This function is in TargetLowering because it uses RegClassForVT which would @@ -1587,13 +1631,16 @@ Value *TargetLoweringBase::getIRStackGuard(IRBuilder<> &IRB) const { // Currently only support "standard" __stack_chk_guard. // TODO: add LOAD_STACK_GUARD support. void TargetLoweringBase::insertSSPDeclarations(Module &M) const { - M.getOrInsertGlobal("__stack_chk_guard", Type::getInt8PtrTy(M.getContext())); + if (!M.getNamedValue("__stack_chk_guard")) + new GlobalVariable(M, Type::getInt8PtrTy(M.getContext()), false, + GlobalVariable::ExternalLinkage, + nullptr, "__stack_chk_guard"); } // Currently only support "standard" __stack_chk_guard. // TODO: add LOAD_STACK_GUARD support. Value *TargetLoweringBase::getSDagStackGuard(const Module &M) const { - return M.getGlobalVariable("__stack_chk_guard", true); + return M.getNamedValue("__stack_chk_guard"); } Value *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const { @@ -1683,7 +1730,7 @@ static int getOpEnabled(bool IsSqrt, EVT VT, StringRef Override) { return TargetLoweringBase::ReciprocalEstimate::Unspecified; SmallVector<StringRef, 4> OverrideVector; - SplitString(Override, OverrideVector, ","); + Override.split(OverrideVector, ','); unsigned NumArgs = OverrideVector.size(); // Check if "all", "none", or "default" was specified. @@ -1743,7 +1790,7 @@ static int getOpRefinementSteps(bool IsSqrt, EVT VT, StringRef Override) { return TargetLoweringBase::ReciprocalEstimate::Unspecified; SmallVector<StringRef, 4> OverrideVector; - SplitString(Override, OverrideVector, ","); + Override.split(OverrideVector, ','); unsigned NumArgs = OverrideVector.size(); // Check if "all", "default", or "none" was specified. diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 24d4baa31e1f..b5dd2d4cca89 100644 --- a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -91,23 +91,86 @@ static void GetObjCImageInfo(Module &M, unsigned &Version, unsigned &Flags, // ELF //===----------------------------------------------------------------------===// -void TargetLoweringObjectFileELF::emitModuleMetadata( - MCStreamer &Streamer, Module &M, const TargetMachine &TM) const { +void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, + const TargetMachine &TgtM) { + TargetLoweringObjectFile::Initialize(Ctx, TgtM); + TM = &TgtM; +} + +void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer, + Module &M) const { + auto &C = getContext(); + + if (NamedMDNode *LinkerOptions = M.getNamedMetadata("llvm.linker.options")) { + auto *S = C.getELFSection(".linker-options", ELF::SHT_LLVM_LINKER_OPTIONS, + ELF::SHF_EXCLUDE); + + Streamer.SwitchSection(S); + + for (const auto &Operand : LinkerOptions->operands()) { + if (cast<MDNode>(Operand)->getNumOperands() != 2) + report_fatal_error("invalid llvm.linker.options"); + for (const auto &Option : cast<MDNode>(Operand)->operands()) { + Streamer.EmitBytes(cast<MDString>(Option)->getString()); + Streamer.EmitIntValue(0, 1); + } + } + } + unsigned Version = 0; unsigned Flags = 0; StringRef Section; GetObjCImageInfo(M, Version, Flags, Section); - if (Section.empty()) + if (!Section.empty()) { + auto *S = C.getELFSection(Section, ELF::SHT_PROGBITS, ELF::SHF_ALLOC); + Streamer.SwitchSection(S); + Streamer.EmitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO"))); + Streamer.EmitIntValue(Version, 4); + Streamer.EmitIntValue(Flags, 4); + Streamer.AddBlankLine(); + } + + SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags; + M.getModuleFlagsMetadata(ModuleFlags); + + MDNode *CFGProfile = nullptr; + + for (const auto &MFE : ModuleFlags) { + StringRef Key = MFE.Key->getString(); + if (Key == "CG Profile") { + CFGProfile = cast<MDNode>(MFE.Val); + break; + } + } + + if (!CFGProfile) return; - auto &C = getContext(); - auto *S = C.getELFSection(Section, ELF::SHT_PROGBITS, ELF::SHF_ALLOC); - Streamer.SwitchSection(S); - Streamer.EmitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO"))); - Streamer.EmitIntValue(Version, 4); - Streamer.EmitIntValue(Flags, 4); - Streamer.AddBlankLine(); + auto GetSym = [this](const MDOperand &MDO) -> MCSymbol * { + if (!MDO) + return nullptr; + auto V = cast<ValueAsMetadata>(MDO); + const Function *F = cast<Function>(V->getValue()); + return TM->getSymbol(F); + }; + + for (const auto &Edge : CFGProfile->operands()) { + MDNode *E = cast<MDNode>(Edge); + const MCSymbol *From = GetSym(E->getOperand(0)); + const MCSymbol *To = GetSym(E->getOperand(1)); + // Skip null functions. This can happen if functions are dead stripped after + // the CGProfile pass has been run. + if (!From || !To) + continue; + uint64_t Count = cast<ConstantAsMetadata>(E->getOperand(2)) + ->getValue() + ->getUniqueInteger() + .getZExtValue(); + Streamer.emitCGProfileEntry( + MCSymbolRefExpr::create(From, MCSymbolRefExpr::VK_None, C), + MCSymbolRefExpr::create(To, MCSymbolRefExpr::VK_None, C), Count); + } } MCSymbol *TargetLoweringObjectFileELF::getCFIPersonalitySymbol( @@ -170,7 +233,7 @@ const MCExpr *TargetLoweringObjectFileELF::getTTypeGlobalReference( } static SectionKind getELFKindForNamedSection(StringRef Name, SectionKind K) { - // N.B.: The defaults used in here are no the same ones used in MC. + // N.B.: The defaults used in here are not the same ones used in MC. // We follow gcc, MC follows gas. For example, given ".section .eh_frame", // both gas and MC will produce a section with no flags. Given // section(".eh_frame") gcc will produce: @@ -183,7 +246,7 @@ static SectionKind getELFKindForNamedSection(StringRef Name, SectionKind K) { if (Name.empty() || Name[0] != '.') return K; - // Some lame default implementation based on some magic section names. + // Default implementation based on some magic section names. if (Name == ".bss" || Name.startswith(".bss.") || Name.startswith(".gnu.linkonce.b.") || @@ -335,7 +398,8 @@ MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal( /*EntrySize=*/0, Group, UniqueID, AssociatedSymbol); // Make sure that we did not get some other section with incompatible sh_link. // This should not be possible due to UniqueID code above. - assert(Section->getAssociatedSymbol() == AssociatedSymbol); + assert(Section->getAssociatedSymbol() == AssociatedSymbol && + "Associated symbol mismatch between sections"); return Section; } @@ -617,8 +681,8 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, } } -void TargetLoweringObjectFileMachO::emitModuleMetadata( - MCStreamer &Streamer, Module &M, const TargetMachine &TM) const { +void TargetLoweringObjectFileMachO::emitModuleMetadata(MCStreamer &Streamer, + Module &M) const { // Emit the linker options if present. if (auto *LinkerOptions = M.getNamedMetadata("llvm.linker.options")) { for (const auto &Option : LinkerOptions->operands()) { @@ -727,6 +791,8 @@ MCSection *TargetLoweringObjectFileMachO::SelectSectionForGlobal( if (GO->isWeakForLinker()) { if (Kind.isReadOnly()) return ConstTextCoalSection; + if (Kind.isReadOnlyWithRel()) + return ConstDataCoalSection; return DataCoalSection; } @@ -1040,7 +1106,7 @@ MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal( Selection); } -static const char *getCOFFSectionNameForUniqueGlobal(SectionKind Kind) { +static StringRef getCOFFSectionNameForUniqueGlobal(SectionKind Kind) { if (Kind.isText()) return ".text"; if (Kind.isBSS()) @@ -1063,7 +1129,8 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal( EmitUniquedSection = TM.getDataSections(); if ((EmitUniquedSection && !Kind.isCommon()) || GO->hasComdat()) { - const char *Name = getCOFFSectionNameForUniqueGlobal(Kind); + SmallString<256> Name = getCOFFSectionNameForUniqueGlobal(Kind); + unsigned Characteristics = getCOFFSectionFlags(Kind, TM); Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; @@ -1083,6 +1150,12 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal( if (!ComdatGV->hasPrivateLinkage()) { MCSymbol *Sym = TM.getSymbol(ComdatGV); StringRef COMDATSymName = Sym->getName(); + + // Append "$symbol" to the section name when targetting mingw. The ld.bfd + // COFF linker will not properly handle comdats otherwise. + if (getTargetTriple().isWindowsGNUEnvironment()) + raw_svector_ostream(Name) << '$' << COMDATSymName; + return getContext().getCOFFSection(Name, Characteristics, Kind, COMDATSymName, Selection, UniqueID); } else { @@ -1140,17 +1213,18 @@ MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable( StringRef COMDATSymName = Sym->getName(); SectionKind Kind = SectionKind::getReadOnly(); - const char *Name = getCOFFSectionNameForUniqueGlobal(Kind); + StringRef SecName = getCOFFSectionNameForUniqueGlobal(Kind); unsigned Characteristics = getCOFFSectionFlags(Kind, TM); Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; unsigned UniqueID = NextUniqueID++; - return getContext().getCOFFSection(Name, Characteristics, Kind, COMDATSymName, - COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE, UniqueID); + return getContext().getCOFFSection( + SecName, Characteristics, Kind, COMDATSymName, + COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE, UniqueID); } -void TargetLoweringObjectFileCOFF::emitModuleMetadata( - MCStreamer &Streamer, Module &M, const TargetMachine &TM) const { +void TargetLoweringObjectFileCOFF::emitModuleMetadata(MCStreamer &Streamer, + Module &M) const { if (NamedMDNode *LinkerOptions = M.getNamedMetadata("llvm.linker.options")) { // Emit the linker options to the linker .drectve section. According to the // spec, this section is a space-separated string containing flags for @@ -1250,19 +1324,136 @@ void TargetLoweringObjectFileCOFF::emitLinkerFlagsForGlobal( emitLinkerFlagsForGlobalCOFF(OS, GV, getTargetTriple(), getMangler()); } +void TargetLoweringObjectFileCOFF::emitLinkerFlagsForUsed( + raw_ostream &OS, const GlobalValue *GV) const { + emitLinkerFlagsForUsedCOFF(OS, GV, getTargetTriple(), getMangler()); +} + +const MCExpr *TargetLoweringObjectFileCOFF::lowerRelativeReference( + const GlobalValue *LHS, const GlobalValue *RHS, + const TargetMachine &TM) const { + const Triple &T = TM.getTargetTriple(); + if (!T.isKnownWindowsMSVCEnvironment() && + !T.isWindowsItaniumEnvironment() && + !T.isWindowsCoreCLREnvironment()) + return nullptr; + + // Our symbols should exist in address space zero, cowardly no-op if + // otherwise. + if (LHS->getType()->getPointerAddressSpace() != 0 || + RHS->getType()->getPointerAddressSpace() != 0) + return nullptr; + + // Both ptrtoint instructions must wrap global objects: + // - Only global variables are eligible for image relative relocations. + // - The subtrahend refers to the special symbol __ImageBase, a GlobalVariable. + // We expect __ImageBase to be a global variable without a section, externally + // defined. + // + // It should look something like this: @__ImageBase = external constant i8 + if (!isa<GlobalObject>(LHS) || !isa<GlobalVariable>(RHS) || + LHS->isThreadLocal() || RHS->isThreadLocal() || + RHS->getName() != "__ImageBase" || !RHS->hasExternalLinkage() || + cast<GlobalVariable>(RHS)->hasInitializer() || RHS->hasSection()) + return nullptr; + + return MCSymbolRefExpr::create(TM.getSymbol(LHS), + MCSymbolRefExpr::VK_COFF_IMGREL32, + getContext()); +} + +static std::string APIntToHexString(const APInt &AI) { + unsigned Width = (AI.getBitWidth() / 8) * 2; + std::string HexString = utohexstr(AI.getLimitedValue(), /*LowerCase=*/true); + unsigned Size = HexString.size(); + assert(Width >= Size && "hex string is too large!"); + HexString.insert(HexString.begin(), Width - Size, '0'); + + return HexString; +} + +static std::string scalarConstantToHexString(const Constant *C) { + Type *Ty = C->getType(); + if (isa<UndefValue>(C)) { + return APIntToHexString(APInt::getNullValue(Ty->getPrimitiveSizeInBits())); + } else if (const auto *CFP = dyn_cast<ConstantFP>(C)) { + return APIntToHexString(CFP->getValueAPF().bitcastToAPInt()); + } else if (const auto *CI = dyn_cast<ConstantInt>(C)) { + return APIntToHexString(CI->getValue()); + } else { + unsigned NumElements; + if (isa<VectorType>(Ty)) + NumElements = Ty->getVectorNumElements(); + else + NumElements = Ty->getArrayNumElements(); + std::string HexString; + for (int I = NumElements - 1, E = -1; I != E; --I) + HexString += scalarConstantToHexString(C->getAggregateElement(I)); + return HexString; + } +} + +MCSection *TargetLoweringObjectFileCOFF::getSectionForConstant( + const DataLayout &DL, SectionKind Kind, const Constant *C, + unsigned &Align) const { + if (Kind.isMergeableConst() && C && + getContext().getAsmInfo()->hasCOFFComdatConstants()) { + // This creates comdat sections with the given symbol name, but unless + // AsmPrinter::GetCPISymbol actually makes the symbol global, the symbol + // will be created with a null storage class, which makes GNU binutils + // error out. + const unsigned Characteristics = COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_LNK_COMDAT; + std::string COMDATSymName; + if (Kind.isMergeableConst4()) { + if (Align <= 4) { + COMDATSymName = "__real@" + scalarConstantToHexString(C); + Align = 4; + } + } else if (Kind.isMergeableConst8()) { + if (Align <= 8) { + COMDATSymName = "__real@" + scalarConstantToHexString(C); + Align = 8; + } + } else if (Kind.isMergeableConst16()) { + // FIXME: These may not be appropriate for non-x86 architectures. + if (Align <= 16) { + COMDATSymName = "__xmm@" + scalarConstantToHexString(C); + Align = 16; + } + } else if (Kind.isMergeableConst32()) { + if (Align <= 32) { + COMDATSymName = "__ymm@" + scalarConstantToHexString(C); + Align = 32; + } + } + + if (!COMDATSymName.empty()) + return getContext().getCOFFSection(".rdata", Characteristics, Kind, + COMDATSymName, + COFF::IMAGE_COMDAT_SELECT_ANY); + } + + return TargetLoweringObjectFile::getSectionForConstant(DL, Kind, C, Align); +} + + //===----------------------------------------------------------------------===// // Wasm //===----------------------------------------------------------------------===// -static void checkWasmComdat(const GlobalValue *GV) { +static const Comdat *getWasmComdat(const GlobalValue *GV) { const Comdat *C = GV->getComdat(); if (!C) - return; + return nullptr; - // TODO(sbc): At some point we may need COMDAT support but currently - // they are not supported. - report_fatal_error("WebAssembly doesn't support COMDATs, '" + C->getName() + - "' cannot be lowered."); + if (C->getSelectionKind() != Comdat::Any) + report_fatal_error("WebAssembly COMDATs only support " + "SelectionKind::Any, '" + C->getName() + "' cannot be " + "lowered."); + + return C; } static SectionKind getWasmKindForNamedSection(StringRef Name, SectionKind K) { @@ -1277,17 +1468,32 @@ static SectionKind getWasmKindForNamedSection(StringRef Name, SectionKind K) { MCSection *TargetLoweringObjectFileWasm::getExplicitSectionGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { + // We don't support explict section names for functions in the wasm object + // format. Each function has to be in its own unique section. + if (isa<Function>(GO)) { + return SelectSectionForGlobal(GO, Kind, TM); + } + StringRef Name = GO->getSection(); - checkWasmComdat(GO); + Kind = getWasmKindForNamedSection(Name, Kind); - return getContext().getWasmSection(Name, Kind); + + StringRef Group = ""; + if (const Comdat *C = getWasmComdat(GO)) { + Group = C->getName(); + } + + return getContext().getWasmSection(Name, Kind, Group, + MCContext::GenericSectionID); } static MCSectionWasm *selectWasmSectionForGlobal( MCContext &Ctx, const GlobalObject *GO, SectionKind Kind, Mangler &Mang, const TargetMachine &TM, bool EmitUniqueSection, unsigned *NextUniqueID) { StringRef Group = ""; - checkWasmComdat(GO); + if (const Comdat *C = getWasmComdat(GO)) { + Group = C->getName(); + } bool UniqueSectionNames = TM.getUniqueSectionNames(); SmallString<128> Name = getSectionPrefixForGlobal(Kind); diff --git a/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp b/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp index 3e6ad3eeef0f..3fca2f4ee4fe 100644 --- a/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -41,6 +41,7 @@ #include "llvm/Support/Threading.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/SymbolRewriter.h" #include <cassert> #include <string> @@ -80,6 +81,9 @@ static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm", cl::desc("Disable Machine LICM")); static cl::opt<bool> DisableMachineSink("disable-machine-sink", cl::Hidden, cl::desc("Disable Machine Sinking")); +static cl::opt<bool> DisablePostRAMachineSink("disable-postra-machine-sink", + cl::Hidden, + cl::desc("Disable PostRA Machine Sinking")); static cl::opt<bool> DisableLSR("disable-lsr", cl::Hidden, cl::desc("Disable Loop Strength Reduction Pass")); static cl::opt<bool> DisableConstantHoisting("disable-constant-hoisting", @@ -94,10 +98,9 @@ static cl::opt<bool> EnableImplicitNullChecks( "enable-implicit-null-checks", cl::desc("Fold null checks into faulting memory operations"), cl::init(false), cl::Hidden); -static cl::opt<bool> - EnableMergeICmps("enable-mergeicmps", - cl::desc("Merge ICmp chains into a single memcmp"), - cl::init(false), cl::Hidden); +static cl::opt<bool> DisableMergeICmps("disable-mergeicmps", + cl::desc("Disable MergeICmps Pass"), + cl::init(false), cl::Hidden); static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden, cl::desc("Print LLVM IR produced by the loop-reduce pass")); static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden, @@ -108,14 +111,16 @@ static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden, cl::desc("Verify generated machine code"), cl::init(false), cl::ZeroOrMore); -static cl::opt<bool> EnableMachineOutliner("enable-machine-outliner", - cl::Hidden, - cl::desc("Enable machine outliner")); -static cl::opt<bool> EnableLinkOnceODROutlining( - "enable-linkonceodr-outlining", - cl::Hidden, - cl::desc("Enable the machine outliner on linkonceodr functions"), - cl::init(false)); +enum RunOutliner { AlwaysOutline, NeverOutline, TargetDefault }; +// Enable or disable the MachineOutliner. +static cl::opt<RunOutliner> EnableMachineOutliner( + "enable-machine-outliner", cl::desc("Enable the machine outliner"), + cl::Hidden, cl::ValueOptional, cl::init(TargetDefault), + cl::values(clEnumValN(AlwaysOutline, "always", + "Run on all functions guaranteed to be beneficial"), + clEnumValN(NeverOutline, "never", "Disable all outlining"), + // Sentinel value for unspecified option. + clEnumValN(AlwaysOutline, "", ""))); // Enable or disable FastISel. Both options are needed, because // FastISel is enabled by default with -fast, and we wish to be // able to enable or disable fast-isel independently from -O0. @@ -123,9 +128,9 @@ static cl::opt<cl::boolOrDefault> EnableFastISelOption("fast-isel", cl::Hidden, cl::desc("Enable the \"fast\" instruction selector")); -static cl::opt<cl::boolOrDefault> - EnableGlobalISel("global-isel", cl::Hidden, - cl::desc("Enable the \"global\" instruction selector")); +static cl::opt<cl::boolOrDefault> EnableGlobalISelOption( + "global-isel", cl::Hidden, + cl::desc("Enable the \"global\" instruction selector")); static cl::opt<std::string> PrintMachineInstrs( "print-machineinstrs", cl::ValueOptional, cl::desc("Print machine instrs"), @@ -226,7 +231,7 @@ static IdentifyingPassPtr overridePass(AnalysisID StandardID, if (StandardID == &TailDuplicateID) return applyDisable(TargetID, DisableTailDuplicate); - if (StandardID == &TargetPassConfig::EarlyTailDuplicateID) + if (StandardID == &EarlyTailDuplicateID) return applyDisable(TargetID, DisableEarlyTailDup); if (StandardID == &MachineBlockPlacementID) @@ -241,18 +246,21 @@ static IdentifyingPassPtr overridePass(AnalysisID StandardID, if (StandardID == &EarlyIfConverterID) return applyDisable(TargetID, DisableEarlyIfConversion); - if (StandardID == &MachineLICMID) + if (StandardID == &EarlyMachineLICMID) return applyDisable(TargetID, DisableMachineLICM); if (StandardID == &MachineCSEID) return applyDisable(TargetID, DisableMachineCSE); - if (StandardID == &TargetPassConfig::PostRAMachineLICMID) + if (StandardID == &MachineLICMID) return applyDisable(TargetID, DisablePostRAMachineLICM); if (StandardID == &MachineSinkingID) return applyDisable(TargetID, DisableMachineSink); + if (StandardID == &PostRAMachineSinkingID) + return applyDisable(TargetID, DisablePostRAMachineSink); + if (StandardID == &MachineCopyPropagationID) return applyDisable(TargetID, DisableCopyProp); @@ -267,10 +275,6 @@ INITIALIZE_PASS(TargetPassConfig, "targetpassconfig", "Target Pass Configuration", false, false) char TargetPassConfig::ID = 0; -// Pseudo Pass IDs. -char TargetPassConfig::EarlyTailDuplicateID = 0; -char TargetPassConfig::PostRAMachineLICMID = 0; - namespace { struct InsertedPass { @@ -366,10 +370,6 @@ TargetPassConfig::TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm) initializeBasicAAWrapperPassPass(*PassRegistry::getPassRegistry()); initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry()); - // Substitute Pseudo Pass IDs for real ones. - substitutePass(&EarlyTailDuplicateID, &TailDuplicateID); - substitutePass(&PostRAMachineLICMID, &MachineLICMID); - if (StringRef(PrintMachineInstrs.getValue()).equals("")) TM.Options.PrintMachineCode = true; @@ -604,7 +604,7 @@ void TargetPassConfig::addIRPasses() { // loads and compares. ExpandMemCmpPass then tries to expand those calls // into optimally-sized loads and compares. The transforms are enabled by a // target lowering hook. - if (EnableMergeICmps) + if (!DisableMergeICmps) addPass(createMergeICmpsPass()); addPass(createExpandMemCmpPass()); } @@ -662,6 +662,14 @@ void TargetPassConfig::addPassesToHandleExceptions() { addPass(createWinEHPass()); addPass(createDwarfEHPass()); break; + case ExceptionHandling::Wasm: + // Wasm EH uses Windows EH instructions, but it does not need to demote PHIs + // on catchpads and cleanuppads because it does not outline them into + // funclets. Catchswitch blocks are not lowered in SelectionDAG, so we + // should remove PHIs there. + addPass(createWinEHPass(/*DemoteCatchSwitchPHIOnly=*/false)); + addPass(createWasmEHPass()); + break; case ExceptionHandling::None: addPass(createLowerInvokePass()); @@ -704,19 +712,18 @@ void TargetPassConfig::addISelPrepare() { } bool TargetPassConfig::addCoreISelPasses() { - // Enable FastISel with -fast, but allow that to be overridden. + // Enable FastISel with -fast-isel, but allow that to be overridden. TM->setO0WantsFastISel(EnableFastISelOption != cl::BOU_FALSE); if (EnableFastISelOption == cl::BOU_TRUE || (TM->getOptLevel() == CodeGenOpt::None && TM->getO0WantsFastISel())) TM->setFastISel(true); - // Ask the target for an isel. - // Enable GlobalISel if the target wants to, but allow that to be overriden. + // Ask the target for an instruction selector. // Explicitly enabling fast-isel should override implicitly enabled // global-isel. - if (EnableGlobalISel == cl::BOU_TRUE || - (EnableGlobalISel == cl::BOU_UNSET && isGlobalISelEnabled() && - EnableFastISelOption != cl::BOU_TRUE)) { + if (EnableGlobalISelOption == cl::BOU_TRUE || + (EnableGlobalISelOption == cl::BOU_UNSET && + TM->Options.EnableGlobalISel && EnableFastISelOption != cl::BOU_TRUE)) { TM->setFastISel(false); if (addIRTranslator()) @@ -755,7 +762,7 @@ bool TargetPassConfig::addCoreISelPasses() { } bool TargetPassConfig::addISelPasses() { - if (TM->Options.EmulatedTLS) + if (TM->useEmulatedTLS()) addPass(createLowerEmuTLSPass()); addPass(createPreISelIntrinsicLoweringPass()); @@ -844,8 +851,10 @@ void TargetPassConfig::addMachinePasses() { addPostRegAlloc(); // Insert prolog/epilog code. Eliminate abstract frame index references... - if (getOptLevel() != CodeGenOpt::None) + if (getOptLevel() != CodeGenOpt::None) { + addPass(&PostRAMachineSinkingID); addPass(&ShrinkWrapID); + } // Prolog/Epilog inserter needs a TargetMachine to instantiate. But only // do so if it hasn't been disabled, substituted, or overridden. @@ -904,8 +913,14 @@ void TargetPassConfig::addMachinePasses() { addPass(&XRayInstrumentationID, false); addPass(&PatchableFunctionID, false); - if (EnableMachineOutliner) - PM->add(createMachineOutlinerPass(EnableLinkOnceODROutlining)); + if (TM->Options.EnableMachineOutliner && getOptLevel() != CodeGenOpt::None && + EnableMachineOutliner != NeverOutline) { + bool RunOnAllFunctions = (EnableMachineOutliner == AlwaysOutline); + bool AddOutliner = RunOnAllFunctions || + TM->Options.SupportsDefaultOutlining; + if (AddOutliner) + addPass(createMachineOutlinerPass(RunOnAllFunctions)); + } // Add passes that directly emit MI after all other MI passes. addPreEmitPass2(); @@ -941,7 +956,7 @@ void TargetPassConfig::addMachineSSAOptimization() { // loop info, just like LICM and CSE below. addILPOpts(); - addPass(&MachineLICMID, false); + addPass(&EarlyMachineLICMID, false); addPass(&MachineCSEID, false); addPass(&MachineSinkingID); @@ -1090,10 +1105,14 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { // kill markers. addPass(&StackSlotColoringID); + // Copy propagate to forward register uses and try to eliminate COPYs that + // were not coalesced. + addPass(&MachineCopyPropagationID); + // Run post-ra machine LICM to hoist reloads / remats. // // FIXME: can this move into MachineLateOptimization? - addPass(&PostRAMachineLICMID); + addPass(&MachineLICMID); } } @@ -1135,18 +1154,13 @@ void TargetPassConfig::addBlockPlacement() { //===---------------------------------------------------------------------===// /// GlobalISel Configuration //===---------------------------------------------------------------------===// - -bool TargetPassConfig::isGlobalISelEnabled() const { - return false; -} - bool TargetPassConfig::isGlobalISelAbortEnabled() const { if (EnableGlobalISelAbort.getNumOccurrences() > 0) return EnableGlobalISelAbort == 1; // When no abort behaviour is specified, we don't abort if the target says // that GISel is enabled. - return !isGlobalISelEnabled(); + return !TM->Options.EnableGlobalISel; } bool TargetPassConfig::reportDiagnosticWhenGlobalISelFallback() const { diff --git a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp index f03c3b8300f3..661dc18f7a85 100644 --- a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp +++ b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp @@ -19,15 +19,16 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Printable.h" #include "llvm/Support/raw_ostream.h" @@ -86,18 +87,24 @@ bool TargetRegisterInfo::checkAllSuperRegsMarked(const BitVector &RegisterSet, namespace llvm { Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI, - unsigned SubIdx) { - return Printable([Reg, TRI, SubIdx](raw_ostream &OS) { + unsigned SubIdx, const MachineRegisterInfo *MRI) { + return Printable([Reg, TRI, SubIdx, MRI](raw_ostream &OS) { if (!Reg) - OS << "%noreg"; + OS << "$noreg"; else if (TargetRegisterInfo::isStackSlot(Reg)) OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg); - else if (TargetRegisterInfo::isVirtualRegister(Reg)) - OS << '%' << TargetRegisterInfo::virtReg2Index(Reg); + else if (TargetRegisterInfo::isVirtualRegister(Reg)) { + StringRef Name = MRI ? MRI->getVRegName(Reg) : ""; + if (Name != "") { + OS << '%' << Name; + } else { + OS << '%' << TargetRegisterInfo::virtReg2Index(Reg); + } + } else if (!TRI) - OS << '%' << "physreg" << Reg; + OS << '$' << "physreg" << Reg; else if (Reg < TRI->getNumRegs()) { - OS << '%'; + OS << '$'; printLowerCase(TRI->getName(Reg), OS); } else llvm_unreachable("Register kind is unsupported."); @@ -338,7 +345,7 @@ getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA, return BestRC; } -/// \brief Check if the registers defined by the pair (RegisterClass, SubReg) +/// Check if the registers defined by the pair (RegisterClass, SubReg) /// share the same register file. static bool shareSameRegisterFile(const TargetRegisterInfo &TRI, const TargetRegisterClass *DefRC, @@ -436,7 +443,8 @@ bool TargetRegisterInfo::needsStackRealignment( if (F.hasFnAttribute("stackrealign") || requiresRealignment) { if (canRealignStack(MF)) return true; - DEBUG(dbgs() << "Can't realign function's stack: " << F.getName() << "\n"); + LLVM_DEBUG(dbgs() << "Can't realign function's stack: " << F.getName() + << "\n"); } return false; } @@ -450,6 +458,51 @@ bool TargetRegisterInfo::regmaskSubsetEqual(const uint32_t *mask0, return true; } +unsigned TargetRegisterInfo::getRegSizeInBits(unsigned Reg, + const MachineRegisterInfo &MRI) const { + const TargetRegisterClass *RC{}; + if (isPhysicalRegister(Reg)) { + // The size is not directly available for physical registers. + // Instead, we need to access a register class that contains Reg and + // get the size of that register class. + RC = getMinimalPhysRegClass(Reg); + } else { + LLT Ty = MRI.getType(Reg); + unsigned RegSize = Ty.isValid() ? Ty.getSizeInBits() : 0; + // If Reg is not a generic register, query the register class to + // get its size. + if (RegSize) + return RegSize; + // Since Reg is not a generic register, it must have a register class. + RC = MRI.getRegClass(Reg); + } + assert(RC && "Unable to deduce the register class"); + return getRegSizeInBits(*RC); +} + +unsigned +TargetRegisterInfo::lookThruCopyLike(unsigned SrcReg, + const MachineRegisterInfo *MRI) const { + while (true) { + const MachineInstr *MI = MRI->getVRegDef(SrcReg); + if (!MI->isCopyLike()) + return SrcReg; + + unsigned CopySrcReg; + if (MI->isCopy()) + CopySrcReg = MI->getOperand(1).getReg(); + else { + assert(MI->isSubregToReg() && "Bad opcode for lookThruCopyLike"); + CopySrcReg = MI->getOperand(2).getReg(); + } + + if (!isVirtualRegister(CopySrcReg)) + return CopySrcReg; + + SrcReg = CopySrcReg; + } +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex, diff --git a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp index 86dbf1b2aeab..3cff31ad4933 100644 --- a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp +++ b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp @@ -61,12 +61,10 @@ static unsigned lcm(unsigned A, unsigned B) { return LCM; } -void TargetSchedModel::init(const MCSchedModel &sm, - const TargetSubtargetInfo *sti, - const TargetInstrInfo *tii) { - SchedModel = sm; - STI = sti; - TII = tii; +void TargetSchedModel::init(const TargetSubtargetInfo *TSInfo) { + STI = TSInfo; + SchedModel = TSInfo->getSchedModel(); + TII = TSInfo->getInstrInfo(); STI->initInstrItins(InstrItins); unsigned NumRes = SchedModel.getNumProcResourceKinds(); @@ -257,31 +255,19 @@ unsigned TargetSchedModel::computeOperandLatency( unsigned TargetSchedModel::computeInstrLatency(const MCSchedClassDesc &SCDesc) const { - unsigned Latency = 0; - for (unsigned DefIdx = 0, DefEnd = SCDesc.NumWriteLatencyEntries; - DefIdx != DefEnd; ++DefIdx) { - // Lookup the definition's write latency in SubtargetInfo. - const MCWriteLatencyEntry *WLEntry = - STI->getWriteLatencyEntry(&SCDesc, DefIdx); - Latency = std::max(Latency, capLatency(WLEntry->Cycles)); - } - return Latency; + return capLatency(MCSchedModel::computeInstrLatency(*STI, SCDesc)); } unsigned TargetSchedModel::computeInstrLatency(unsigned Opcode) const { assert(hasInstrSchedModel() && "Only call this function with a SchedModel"); - unsigned SCIdx = TII->get(Opcode).getSchedClass(); - const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SCIdx); - - if (SCDesc->isValid() && !SCDesc->isVariant()) - return computeInstrLatency(*SCDesc); + return capLatency(SchedModel.computeInstrLatency(*STI, SCIdx)); +} - if (SCDesc->isValid()) { - assert (!SCDesc->isVariant() && "No MI sched latency: SCDesc->isVariant()"); - return computeInstrLatency(*SCDesc); - } - return 0; +unsigned TargetSchedModel::computeInstrLatency(const MCInst &Inst) const { + if (hasInstrSchedModel()) + return capLatency(SchedModel.computeInstrLatency(*STI, *TII, Inst)); + return computeInstrLatency(Inst.getOpcode()); } unsigned @@ -336,71 +322,39 @@ computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, return 0; } -static Optional<double> -getRThroughputFromItineraries(unsigned schedClass, - const InstrItineraryData *IID){ - Optional<double> Throughput; - - for (const InstrStage *IS = IID->beginStage(schedClass), - *E = IID->endStage(schedClass); - IS != E; ++IS) { - if (IS->getCycles()) { - double Temp = countPopulation(IS->getUnits()) * 1.0 / IS->getCycles(); - Throughput = Throughput.hasValue() - ? std::min(Throughput.getValue(), Temp) - : Temp; - } - } - if (Throughput.hasValue()) - // We need reciprocal throughput that's why we return such value. - return 1 / Throughput.getValue(); - return Throughput; -} - -static Optional<double> -getRThroughputFromInstrSchedModel(const MCSchedClassDesc *SCDesc, - const TargetSubtargetInfo *STI, - const MCSchedModel &SchedModel) { - Optional<double> Throughput; - - for (const MCWriteProcResEntry *WPR = STI->getWriteProcResBegin(SCDesc), - *WEnd = STI->getWriteProcResEnd(SCDesc); - WPR != WEnd; ++WPR) { - if (WPR->Cycles) { - unsigned NumUnits = - SchedModel.getProcResource(WPR->ProcResourceIdx)->NumUnits; - double Temp = NumUnits * 1.0 / WPR->Cycles; - Throughput = Throughput.hasValue() - ? std::min(Throughput.getValue(), Temp) - : Temp; - } +double +TargetSchedModel::computeReciprocalThroughput(const MachineInstr *MI) const { + if (hasInstrItineraries()) { + unsigned SchedClass = MI->getDesc().getSchedClass(); + return MCSchedModel::getReciprocalThroughput(SchedClass, + *getInstrItineraries()); } - if (Throughput.hasValue()) - // We need reciprocal throughput that's why we return such value. - return 1 / Throughput.getValue(); - return Throughput; -} -Optional<double> -TargetSchedModel::computeInstrRThroughput(const MachineInstr *MI) const { - if (hasInstrItineraries()) - return getRThroughputFromItineraries(MI->getDesc().getSchedClass(), - getInstrItineraries()); if (hasInstrSchedModel()) - return getRThroughputFromInstrSchedModel(resolveSchedClass(MI), STI, - SchedModel); - return Optional<double>(); + return MCSchedModel::getReciprocalThroughput(*STI, *resolveSchedClass(MI)); + + return 0.0; } -Optional<double> -TargetSchedModel::computeInstrRThroughput(unsigned Opcode) const { +double +TargetSchedModel::computeReciprocalThroughput(unsigned Opcode) const { unsigned SchedClass = TII->get(Opcode).getSchedClass(); if (hasInstrItineraries()) - return getRThroughputFromItineraries(SchedClass, getInstrItineraries()); + return MCSchedModel::getReciprocalThroughput(SchedClass, + *getInstrItineraries()); if (hasInstrSchedModel()) { - const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass); - if (SCDesc->isValid() && !SCDesc->isVariant()) - return getRThroughputFromInstrSchedModel(SCDesc, STI, SchedModel); + const MCSchedClassDesc &SCDesc = *SchedModel.getSchedClassDesc(SchedClass); + if (SCDesc.isValid() && !SCDesc.isVariant()) + return MCSchedModel::getReciprocalThroughput(*STI, SCDesc); } - return Optional<double>(); + + return 0.0; } + +double +TargetSchedModel::computeReciprocalThroughput(const MCInst &MI) const { + if (hasInstrSchedModel()) + return SchedModel.getReciprocalThroughput(*STI, *TII, MI); + return computeReciprocalThroughput(MI.getOpcode()); +} + diff --git a/contrib/llvm/lib/CodeGen/TargetSubtargetInfo.cpp b/contrib/llvm/lib/CodeGen/TargetSubtargetInfo.cpp index 8693f344f9be..fa29c05fd6c2 100644 --- a/contrib/llvm/lib/CodeGen/TargetSubtargetInfo.cpp +++ b/contrib/llvm/lib/CodeGen/TargetSubtargetInfo.cpp @@ -67,18 +67,15 @@ bool TargetSubtargetInfo::useAA() const { return false; } -static std::string createSchedInfoStr(unsigned Latency, - Optional<double> RThroughput) { +static std::string createSchedInfoStr(unsigned Latency, double RThroughput) { static const char *SchedPrefix = " sched: ["; std::string Comment; raw_string_ostream CS(Comment); - if (Latency > 0 && RThroughput.hasValue()) - CS << SchedPrefix << Latency << format(":%2.2f", RThroughput.getValue()) + if (RThroughput != 0.0) + CS << SchedPrefix << Latency << format(":%2.2f", RThroughput) << "]"; - else if (Latency > 0) + else CS << SchedPrefix << Latency << ":?]"; - else if (RThroughput.hasValue()) - CS << SchedPrefix << "?:" << RThroughput.getValue() << "]"; CS.flush(); return Comment; } @@ -90,9 +87,9 @@ std::string TargetSubtargetInfo::getSchedInfoStr(const MachineInstr &MI) const { // We don't cache TSchedModel because it depends on TargetInstrInfo // that could be changed during the compilation TargetSchedModel TSchedModel; - TSchedModel.init(getSchedModel(), this, getInstrInfo()); + TSchedModel.init(this); unsigned Latency = TSchedModel.computeInstrLatency(&MI); - Optional<double> RThroughput = TSchedModel.computeInstrRThroughput(&MI); + double RThroughput = TSchedModel.computeReciprocalThroughput(&MI); return createSchedInfoStr(Latency, RThroughput); } @@ -101,17 +98,19 @@ std::string TargetSubtargetInfo::getSchedInfoStr(MCInst const &MCI) const { // We don't cache TSchedModel because it depends on TargetInstrInfo // that could be changed during the compilation TargetSchedModel TSchedModel; - TSchedModel.init(getSchedModel(), this, getInstrInfo()); + TSchedModel.init(this); unsigned Latency; if (TSchedModel.hasInstrSchedModel()) - Latency = TSchedModel.computeInstrLatency(MCI.getOpcode()); + Latency = TSchedModel.computeInstrLatency(MCI); else if (TSchedModel.hasInstrItineraries()) { auto *ItinData = TSchedModel.getInstrItineraries(); Latency = ItinData->getStageLatency( getInstrInfo()->get(MCI.getOpcode()).getSchedClass()); } else return std::string(); - Optional<double> RThroughput = - TSchedModel.computeInstrRThroughput(MCI.getOpcode()); + double RThroughput = TSchedModel.computeReciprocalThroughput(MCI); return createSchedInfoStr(Latency, RThroughput); } + +void TargetSubtargetInfo::mirFileLoaded(MachineFunction &MF) const { +} diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index 774b76f84b7f..0ca435016ead 100644 --- a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -290,8 +290,8 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, unsigned NumVisited = 0; for (MachineInstr &OtherMI : make_range(std::next(OldPos), KillPos)) { - // DBG_VALUE cannot be counted against the limit. - if (OtherMI.isDebugValue()) + // Debug instructions cannot be counted against the limit. + if (OtherMI.isDebugInstr()) continue; if (NumVisited > 30) // FIXME: Arbitrary limit to reduce compile time cost. return false; @@ -685,15 +685,15 @@ bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI, unsigned RegCIdx, unsigned Dist) { unsigned RegC = MI->getOperand(RegCIdx).getReg(); - DEBUG(dbgs() << "2addr: COMMUTING : " << *MI); + LLVM_DEBUG(dbgs() << "2addr: COMMUTING : " << *MI); MachineInstr *NewMI = TII->commuteInstruction(*MI, false, RegBIdx, RegCIdx); if (NewMI == nullptr) { - DEBUG(dbgs() << "2addr: COMMUTING FAILED!\n"); + LLVM_DEBUG(dbgs() << "2addr: COMMUTING FAILED!\n"); return false; } - DEBUG(dbgs() << "2addr: COMMUTED TO: " << *NewMI); + LLVM_DEBUG(dbgs() << "2addr: COMMUTED TO: " << *NewMI); assert(NewMI == MI && "TargetInstrInfo::commuteInstruction() should not return a new " "instruction unless it was requested."); @@ -740,8 +740,8 @@ TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi, if (!NewMI) return false; - DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi); - DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI); + LLVM_DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi); + LLVM_DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI); bool Sunk = false; if (LIS) @@ -940,8 +940,8 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator KillPos = KillMI; ++KillPos; for (MachineInstr &OtherMI : make_range(End, KillPos)) { - // DBG_VALUE cannot be counted against the limit. - if (OtherMI.isDebugValue()) + // Debug instructions cannot be counted against the limit. + if (OtherMI.isDebugInstr()) continue; if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost. return false; @@ -985,7 +985,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, } // Move debug info as well. - while (Begin != MBB->begin() && std::prev(Begin)->isDebugValue()) + while (Begin != MBB->begin() && std::prev(Begin)->isDebugInstr()) --Begin; nmi = End; @@ -1014,7 +1014,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, LV->addVirtualRegisterKilled(Reg, *MI); } - DEBUG(dbgs() << "\trescheduled below kill: " << *KillMI); + LLVM_DEBUG(dbgs() << "\trescheduled below kill: " << *KillMI); return true; } @@ -1114,8 +1114,8 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, unsigned NumVisited = 0; for (MachineInstr &OtherMI : make_range(mi, MachineBasicBlock::iterator(KillMI))) { - // DBG_VALUE cannot be counted against the limit. - if (OtherMI.isDebugValue()) + // Debug instructions cannot be counted against the limit. + if (OtherMI.isDebugInstr()) continue; if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost. return false; @@ -1162,11 +1162,11 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, // Move the old kill above MI, don't forget to move debug info as well. MachineBasicBlock::iterator InsertPos = mi; - while (InsertPos != MBB->begin() && std::prev(InsertPos)->isDebugValue()) + while (InsertPos != MBB->begin() && std::prev(InsertPos)->isDebugInstr()) --InsertPos; MachineBasicBlock::iterator From = KillMI; MachineBasicBlock::iterator To = std::next(From); - while (std::prev(From)->isDebugValue()) + while (std::prev(From)->isDebugInstr()) --From; MBB->splice(InsertPos, MBB, From, To); @@ -1181,7 +1181,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, LV->addVirtualRegisterKilled(Reg, *MI); } - DEBUG(dbgs() << "\trescheduled kill: " << *KillMI); + LLVM_DEBUG(dbgs() << "\trescheduled kill: " << *KillMI); return true; } @@ -1205,6 +1205,7 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI, if (!MI->isCommutable()) return false; + bool MadeChange = false; unsigned DstOpReg = MI->getOperand(DstOpIdx).getReg(); unsigned BaseOpReg = MI->getOperand(BaseOpIdx).getReg(); unsigned OpsNum = MI->getDesc().getNumOperands(); @@ -1223,8 +1224,8 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI, // If OtherOp dies but BaseOp does not, swap the OtherOp and BaseOp // operands. This makes the live ranges of DstOp and OtherOp joinable. - bool DoCommute = - !BaseOpKilled && isKilled(*MI, OtherOpReg, MRI, TII, LIS, false); + bool OtherOpKilled = isKilled(*MI, OtherOpReg, MRI, TII, LIS, false); + bool DoCommute = !BaseOpKilled && OtherOpKilled; if (!DoCommute && isProfitableToCommute(DstOpReg, BaseOpReg, OtherOpReg, MI, Dist)) { @@ -1235,13 +1236,21 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI, // If it's profitable to commute, try to do so. if (DoCommute && commuteInstruction(MI, DstOpIdx, BaseOpIdx, OtherOpIdx, Dist)) { + MadeChange = true; ++NumCommuted; - if (AggressiveCommute) + if (AggressiveCommute) { ++NumAggrCommuted; - return true; + // There might be more than two commutable operands, update BaseOp and + // continue scanning. + BaseOpReg = OtherOpReg; + BaseOpKilled = OtherOpKilled; + continue; + } + // If this was a commute based on kill, we won't do better continuing. + return MadeChange; } } - return false; + return MadeChange; } /// For the case where an instruction has a single pair of tied register @@ -1343,7 +1352,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, const MCInstrDesc &UnfoldMCID = TII->get(NewOpc); if (UnfoldMCID.getNumDefs() == 1) { // Unfold the load. - DEBUG(dbgs() << "2addr: UNFOLDING: " << MI); + LLVM_DEBUG(dbgs() << "2addr: UNFOLDING: " << MI); const TargetRegisterClass *RC = TRI->getAllocatableClass( TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI, *MF)); @@ -1352,7 +1361,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, if (!TII->unfoldMemoryOperand(*MF, MI, Reg, /*UnfoldLoad=*/true, /*UnfoldStore=*/false, NewMIs)) { - DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n"); + LLVM_DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n"); return false; } assert(NewMIs.size() == 2 && @@ -1365,8 +1374,8 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, MBB->insert(mi, NewMIs[0]); MBB->insert(mi, NewMIs[1]); - DEBUG(dbgs() << "2addr: NEW LOAD: " << *NewMIs[0] - << "2addr: NEW INST: " << *NewMIs[1]); + LLVM_DEBUG(dbgs() << "2addr: NEW LOAD: " << *NewMIs[0] + << "2addr: NEW INST: " << *NewMIs[1]); // Transform the instruction, now that it no longer has a load. unsigned NewDstIdx = NewMIs[1]->findRegisterDefOperandIdx(regA); @@ -1431,7 +1440,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, // Transforming didn't eliminate the tie and didn't lead to an // improvement. Clean up the unfolded instructions and keep the // original. - DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n"); + LLVM_DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n"); NewMIs[0]->eraseFromParent(); NewMIs[1]->eraseFromParent(); } @@ -1475,7 +1484,7 @@ collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) { MRI->constrainRegClass(DstReg, RC); SrcMO.setReg(DstReg); SrcMO.setSubReg(0); - DEBUG(dbgs() << "\t\trewrite undef:\t" << *MI); + LLVM_DEBUG(dbgs() << "\t\trewrite undef:\t" << *MI); continue; } TiedOperands[SrcReg].push_back(std::make_pair(SrcIdx, DstIdx)); @@ -1574,7 +1583,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, } } - DEBUG(dbgs() << "\t\tprepend:\t" << *MIB); + LLVM_DEBUG(dbgs() << "\t\tprepend:\t" << *MIB); MachineOperand &MO = MI->getOperand(SrcIdx); assert(MO.isReg() && MO.getReg() == RegB && MO.isUse() && @@ -1668,9 +1677,8 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { bool MadeChange = false; - DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n"); - DEBUG(dbgs() << "********** Function: " - << MF->getName() << '\n'); + LLVM_DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n"); + LLVM_DEBUG(dbgs() << "********** Function: " << MF->getName() << '\n'); // This pass takes the function out of SSA form. MRI->leaveSSA(); @@ -1690,7 +1698,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { MachineBasicBlock::iterator nmi = std::next(mi); // Don't revisit an instruction previously converted by target. It may // contain undef register operands (%noreg), which are not handled. - if (mi->isDebugValue() || SunkInstrs.count(&*mi)) { + if (mi->isDebugInstr() || SunkInstrs.count(&*mi)) { mi = nmi; continue; } @@ -1713,7 +1721,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { ++NumTwoAddressInstrs; MadeChange = true; - DEBUG(dbgs() << '\t' << *mi); + LLVM_DEBUG(dbgs() << '\t' << *mi); // If the instruction has a single pair of tied operands, try some // transformations that may either eliminate the tied operands or @@ -1740,7 +1748,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { // Now iterate over the information collected above. for (auto &TO : TiedOperands) { processTiedPairs(&*mi, TO.second, Dist); - DEBUG(dbgs() << "\t\trewrite to:\t" << *mi); + LLVM_DEBUG(dbgs() << "\t\trewrite to:\t" << *mi); } // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form. @@ -1754,7 +1762,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { mi->getOperand(0).setIsUndef(mi->getOperand(1).isUndef()); mi->RemoveOperand(1); mi->setDesc(TII->get(TargetOpcode::COPY)); - DEBUG(dbgs() << "\t\tconvert to:\t" << *mi); + LLVM_DEBUG(dbgs() << "\t\tconvert to:\t" << *mi); } // Clear TiedOperands here instead of at the top of the loop @@ -1787,7 +1795,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { if (MI.getOperand(0).getSubReg() || TargetRegisterInfo::isPhysicalRegister(DstReg) || !(MI.getNumOperands() & 1)) { - DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << MI); + LLVM_DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << MI); llvm_unreachable(nullptr); } @@ -1838,19 +1846,19 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { if (LV && isKill && !TargetRegisterInfo::isPhysicalRegister(SrcReg)) LV->replaceKillInstruction(SrcReg, MI, *CopyMI); - DEBUG(dbgs() << "Inserted: " << *CopyMI); + LLVM_DEBUG(dbgs() << "Inserted: " << *CopyMI); } MachineBasicBlock::iterator EndMBBI = std::next(MachineBasicBlock::iterator(MI)); if (!DefEmitted) { - DEBUG(dbgs() << "Turned: " << MI << " into an IMPLICIT_DEF"); + LLVM_DEBUG(dbgs() << "Turned: " << MI << " into an IMPLICIT_DEF"); MI.setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); for (int j = MI.getNumOperands() - 1, ee = 0; j > ee; --j) MI.RemoveOperand(j); } else { - DEBUG(dbgs() << "Eliminated: " << MI); + LLVM_DEBUG(dbgs() << "Eliminated: " << MI); MI.eraseFromParent(); } diff --git a/contrib/llvm/lib/CodeGen/ValueTypes.cpp b/contrib/llvm/lib/CodeGen/ValueTypes.cpp new file mode 100644 index 000000000000..adb7075de651 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ValueTypes.cpp @@ -0,0 +1,321 @@ +//===----------- ValueTypes.cpp - Implementation of EVT methods -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/ErrorHandling.h" +using namespace llvm; + +EVT EVT::changeExtendedTypeToInteger() const { + LLVMContext &Context = LLVMTy->getContext(); + return getIntegerVT(Context, getSizeInBits()); +} + +EVT EVT::changeExtendedVectorElementTypeToInteger() const { + LLVMContext &Context = LLVMTy->getContext(); + EVT IntTy = getIntegerVT(Context, getScalarSizeInBits()); + return getVectorVT(Context, IntTy, getVectorNumElements()); +} + +EVT EVT::getExtendedIntegerVT(LLVMContext &Context, unsigned BitWidth) { + EVT VT; + VT.LLVMTy = IntegerType::get(Context, BitWidth); + assert(VT.isExtended() && "Type is not extended!"); + return VT; +} + +EVT EVT::getExtendedVectorVT(LLVMContext &Context, EVT VT, + unsigned NumElements) { + EVT ResultVT; + ResultVT.LLVMTy = VectorType::get(VT.getTypeForEVT(Context), NumElements); + assert(ResultVT.isExtended() && "Type is not extended!"); + return ResultVT; +} + +bool EVT::isExtendedFloatingPoint() const { + assert(isExtended() && "Type is not extended!"); + return LLVMTy->isFPOrFPVectorTy(); +} + +bool EVT::isExtendedInteger() const { + assert(isExtended() && "Type is not extended!"); + return LLVMTy->isIntOrIntVectorTy(); +} + +bool EVT::isExtendedScalarInteger() const { + assert(isExtended() && "Type is not extended!"); + return LLVMTy->isIntegerTy(); +} + +bool EVT::isExtendedVector() const { + assert(isExtended() && "Type is not extended!"); + return LLVMTy->isVectorTy(); +} + +bool EVT::isExtended16BitVector() const { + return isExtendedVector() && getExtendedSizeInBits() == 16; +} + +bool EVT::isExtended32BitVector() const { + return isExtendedVector() && getExtendedSizeInBits() == 32; +} + +bool EVT::isExtended64BitVector() const { + return isExtendedVector() && getExtendedSizeInBits() == 64; +} + +bool EVT::isExtended128BitVector() const { + return isExtendedVector() && getExtendedSizeInBits() == 128; +} + +bool EVT::isExtended256BitVector() const { + return isExtendedVector() && getExtendedSizeInBits() == 256; +} + +bool EVT::isExtended512BitVector() const { + return isExtendedVector() && getExtendedSizeInBits() == 512; +} + +bool EVT::isExtended1024BitVector() const { + return isExtendedVector() && getExtendedSizeInBits() == 1024; +} + +bool EVT::isExtended2048BitVector() const { + return isExtendedVector() && getExtendedSizeInBits() == 2048; +} + +EVT EVT::getExtendedVectorElementType() const { + assert(isExtended() && "Type is not extended!"); + return EVT::getEVT(cast<VectorType>(LLVMTy)->getElementType()); +} + +unsigned EVT::getExtendedVectorNumElements() const { + assert(isExtended() && "Type is not extended!"); + return cast<VectorType>(LLVMTy)->getNumElements(); +} + +unsigned EVT::getExtendedSizeInBits() const { + assert(isExtended() && "Type is not extended!"); + if (IntegerType *ITy = dyn_cast<IntegerType>(LLVMTy)) + return ITy->getBitWidth(); + if (VectorType *VTy = dyn_cast<VectorType>(LLVMTy)) + return VTy->getBitWidth(); + llvm_unreachable("Unrecognized extended type!"); +} + +/// getEVTString - This function returns value type as a string, e.g. "i32". +std::string EVT::getEVTString() const { + switch (V.SimpleTy) { + default: + if (isVector()) + return "v" + utostr(getVectorNumElements()) + + getVectorElementType().getEVTString(); + if (isInteger()) + return "i" + utostr(getSizeInBits()); + llvm_unreachable("Invalid EVT!"); + case MVT::i1: return "i1"; + case MVT::i8: return "i8"; + case MVT::i16: return "i16"; + case MVT::i32: return "i32"; + case MVT::i64: return "i64"; + case MVT::i128: return "i128"; + case MVT::f16: return "f16"; + case MVT::f32: return "f32"; + case MVT::f64: return "f64"; + case MVT::f80: return "f80"; + case MVT::f128: return "f128"; + case MVT::ppcf128: return "ppcf128"; + case MVT::isVoid: return "isVoid"; + case MVT::Other: return "ch"; + case MVT::Glue: return "glue"; + case MVT::x86mmx: return "x86mmx"; + case MVT::v1i1: return "v1i1"; + case MVT::v2i1: return "v2i1"; + case MVT::v4i1: return "v4i1"; + case MVT::v8i1: return "v8i1"; + case MVT::v16i1: return "v16i1"; + case MVT::v32i1: return "v32i1"; + case MVT::v64i1: return "v64i1"; + case MVT::v128i1: return "v128i1"; + case MVT::v512i1: return "v512i1"; + case MVT::v1024i1: return "v1024i1"; + case MVT::v1i8: return "v1i8"; + case MVT::v2i8: return "v2i8"; + case MVT::v4i8: return "v4i8"; + case MVT::v8i8: return "v8i8"; + case MVT::v16i8: return "v16i8"; + case MVT::v32i8: return "v32i8"; + case MVT::v64i8: return "v64i8"; + case MVT::v128i8: return "v128i8"; + case MVT::v256i8: return "v256i8"; + case MVT::v1i16: return "v1i16"; + case MVT::v2i16: return "v2i16"; + case MVT::v4i16: return "v4i16"; + case MVT::v8i16: return "v8i16"; + case MVT::v16i16: return "v16i16"; + case MVT::v32i16: return "v32i16"; + case MVT::v64i16: return "v64i16"; + case MVT::v128i16: return "v128i16"; + case MVT::v1i32: return "v1i32"; + case MVT::v2i32: return "v2i32"; + case MVT::v4i32: return "v4i32"; + case MVT::v8i32: return "v8i32"; + case MVT::v16i32: return "v16i32"; + case MVT::v32i32: return "v32i32"; + case MVT::v64i32: return "v64i32"; + case MVT::v1i64: return "v1i64"; + case MVT::v2i64: return "v2i64"; + case MVT::v4i64: return "v4i64"; + case MVT::v8i64: return "v8i64"; + case MVT::v16i64: return "v16i64"; + case MVT::v32i64: return "v32i64"; + case MVT::v1i128: return "v1i128"; + case MVT::v1f32: return "v1f32"; + case MVT::v2f32: return "v2f32"; + case MVT::v2f16: return "v2f16"; + case MVT::v4f16: return "v4f16"; + case MVT::v8f16: return "v8f16"; + case MVT::v4f32: return "v4f32"; + case MVT::v8f32: return "v8f32"; + case MVT::v16f32: return "v16f32"; + case MVT::v1f64: return "v1f64"; + case MVT::v2f64: return "v2f64"; + case MVT::v4f64: return "v4f64"; + case MVT::v8f64: return "v8f64"; + case MVT::Metadata:return "Metadata"; + case MVT::Untyped: return "Untyped"; + case MVT::ExceptRef: return "ExceptRef"; + } +} + +/// getTypeForEVT - This method returns an LLVM type corresponding to the +/// specified EVT. For integer types, this returns an unsigned type. Note +/// that this will abort for types that cannot be represented. +Type *EVT::getTypeForEVT(LLVMContext &Context) const { + switch (V.SimpleTy) { + default: + assert(isExtended() && "Type is not extended!"); + return LLVMTy; + case MVT::isVoid: return Type::getVoidTy(Context); + case MVT::i1: return Type::getInt1Ty(Context); + case MVT::i8: return Type::getInt8Ty(Context); + case MVT::i16: return Type::getInt16Ty(Context); + case MVT::i32: return Type::getInt32Ty(Context); + case MVT::i64: return Type::getInt64Ty(Context); + case MVT::i128: return IntegerType::get(Context, 128); + case MVT::f16: return Type::getHalfTy(Context); + case MVT::f32: return Type::getFloatTy(Context); + case MVT::f64: return Type::getDoubleTy(Context); + case MVT::f80: return Type::getX86_FP80Ty(Context); + case MVT::f128: return Type::getFP128Ty(Context); + case MVT::ppcf128: return Type::getPPC_FP128Ty(Context); + case MVT::x86mmx: return Type::getX86_MMXTy(Context); + case MVT::v1i1: return VectorType::get(Type::getInt1Ty(Context), 1); + case MVT::v2i1: return VectorType::get(Type::getInt1Ty(Context), 2); + case MVT::v4i1: return VectorType::get(Type::getInt1Ty(Context), 4); + case MVT::v8i1: return VectorType::get(Type::getInt1Ty(Context), 8); + case MVT::v16i1: return VectorType::get(Type::getInt1Ty(Context), 16); + case MVT::v32i1: return VectorType::get(Type::getInt1Ty(Context), 32); + case MVT::v64i1: return VectorType::get(Type::getInt1Ty(Context), 64); + case MVT::v128i1: return VectorType::get(Type::getInt1Ty(Context), 128); + case MVT::v512i1: return VectorType::get(Type::getInt1Ty(Context), 512); + case MVT::v1024i1: return VectorType::get(Type::getInt1Ty(Context), 1024); + case MVT::v1i8: return VectorType::get(Type::getInt8Ty(Context), 1); + case MVT::v2i8: return VectorType::get(Type::getInt8Ty(Context), 2); + case MVT::v4i8: return VectorType::get(Type::getInt8Ty(Context), 4); + case MVT::v8i8: return VectorType::get(Type::getInt8Ty(Context), 8); + case MVT::v16i8: return VectorType::get(Type::getInt8Ty(Context), 16); + case MVT::v32i8: return VectorType::get(Type::getInt8Ty(Context), 32); + case MVT::v64i8: return VectorType::get(Type::getInt8Ty(Context), 64); + case MVT::v128i8: return VectorType::get(Type::getInt8Ty(Context), 128); + case MVT::v256i8: return VectorType::get(Type::getInt8Ty(Context), 256); + case MVT::v1i16: return VectorType::get(Type::getInt16Ty(Context), 1); + case MVT::v2i16: return VectorType::get(Type::getInt16Ty(Context), 2); + case MVT::v4i16: return VectorType::get(Type::getInt16Ty(Context), 4); + case MVT::v8i16: return VectorType::get(Type::getInt16Ty(Context), 8); + case MVT::v16i16: return VectorType::get(Type::getInt16Ty(Context), 16); + case MVT::v32i16: return VectorType::get(Type::getInt16Ty(Context), 32); + case MVT::v64i16: return VectorType::get(Type::getInt16Ty(Context), 64); + case MVT::v128i16: return VectorType::get(Type::getInt16Ty(Context), 128); + case MVT::v1i32: return VectorType::get(Type::getInt32Ty(Context), 1); + case MVT::v2i32: return VectorType::get(Type::getInt32Ty(Context), 2); + case MVT::v4i32: return VectorType::get(Type::getInt32Ty(Context), 4); + case MVT::v8i32: return VectorType::get(Type::getInt32Ty(Context), 8); + case MVT::v16i32: return VectorType::get(Type::getInt32Ty(Context), 16); + case MVT::v32i32: return VectorType::get(Type::getInt32Ty(Context), 32); + case MVT::v64i32: return VectorType::get(Type::getInt32Ty(Context), 64); + case MVT::v1i64: return VectorType::get(Type::getInt64Ty(Context), 1); + case MVT::v2i64: return VectorType::get(Type::getInt64Ty(Context), 2); + case MVT::v4i64: return VectorType::get(Type::getInt64Ty(Context), 4); + case MVT::v8i64: return VectorType::get(Type::getInt64Ty(Context), 8); + case MVT::v16i64: return VectorType::get(Type::getInt64Ty(Context), 16); + case MVT::v32i64: return VectorType::get(Type::getInt64Ty(Context), 32); + case MVT::v1i128: return VectorType::get(Type::getInt128Ty(Context), 1); + case MVT::v2f16: return VectorType::get(Type::getHalfTy(Context), 2); + case MVT::v4f16: return VectorType::get(Type::getHalfTy(Context), 4); + case MVT::v8f16: return VectorType::get(Type::getHalfTy(Context), 8); + case MVT::v1f32: return VectorType::get(Type::getFloatTy(Context), 1); + case MVT::v2f32: return VectorType::get(Type::getFloatTy(Context), 2); + case MVT::v4f32: return VectorType::get(Type::getFloatTy(Context), 4); + case MVT::v8f32: return VectorType::get(Type::getFloatTy(Context), 8); + case MVT::v16f32: return VectorType::get(Type::getFloatTy(Context), 16); + case MVT::v1f64: return VectorType::get(Type::getDoubleTy(Context), 1); + case MVT::v2f64: return VectorType::get(Type::getDoubleTy(Context), 2); + case MVT::v4f64: return VectorType::get(Type::getDoubleTy(Context), 4); + case MVT::v8f64: return VectorType::get(Type::getDoubleTy(Context), 8); + case MVT::Metadata: return Type::getMetadataTy(Context); + } +} + +/// Return the value type corresponding to the specified type. This returns all +/// pointers as MVT::iPTR. If HandleUnknown is true, unknown types are returned +/// as Other, otherwise they are invalid. +MVT MVT::getVT(Type *Ty, bool HandleUnknown){ + switch (Ty->getTypeID()) { + default: + if (HandleUnknown) return MVT(MVT::Other); + llvm_unreachable("Unknown type!"); + case Type::VoidTyID: + return MVT::isVoid; + case Type::IntegerTyID: + return getIntegerVT(cast<IntegerType>(Ty)->getBitWidth()); + case Type::HalfTyID: return MVT(MVT::f16); + case Type::FloatTyID: return MVT(MVT::f32); + case Type::DoubleTyID: return MVT(MVT::f64); + case Type::X86_FP80TyID: return MVT(MVT::f80); + case Type::X86_MMXTyID: return MVT(MVT::x86mmx); + case Type::FP128TyID: return MVT(MVT::f128); + case Type::PPC_FP128TyID: return MVT(MVT::ppcf128); + case Type::PointerTyID: return MVT(MVT::iPTR); + case Type::VectorTyID: { + VectorType *VTy = cast<VectorType>(Ty); + return getVectorVT( + getVT(VTy->getElementType(), false), VTy->getNumElements()); + } + } +} + +/// getEVT - Return the value type corresponding to the specified type. This +/// returns all pointers as MVT::iPTR. If HandleUnknown is true, unknown types +/// are returned as Other, otherwise they are invalid. +EVT EVT::getEVT(Type *Ty, bool HandleUnknown){ + switch (Ty->getTypeID()) { + default: + return MVT::getVT(Ty, HandleUnknown); + case Type::IntegerTyID: + return getIntegerVT(Ty->getContext(), cast<IntegerType>(Ty)->getBitWidth()); + case Type::VectorTyID: { + VectorType *VTy = cast<VectorType>(Ty); + return getVectorVT(Ty->getContext(), getEVT(VTy->getElementType(), false), + VTy->getNumElements()); + } + } +} diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp index 13f7e83f3dd0..0ead2b8340ab 100644 --- a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp +++ b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp @@ -35,6 +35,7 @@ #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/Pass.h" #include "llvm/Support/Compiler.h" @@ -241,10 +242,9 @@ bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) { Indexes = &getAnalysis<SlotIndexes>(); LIS = &getAnalysis<LiveIntervals>(); VRM = &getAnalysis<VirtRegMap>(); - DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n" - << "********** Function: " - << MF->getName() << '\n'); - DEBUG(VRM->dump()); + LLVM_DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n" + << "********** Function: " << MF->getName() << '\n'); + LLVM_DEBUG(VRM->dump()); // Add kill flags while we still have virtual registers. LIS->addKillFlags(VRM); @@ -376,7 +376,7 @@ bool VirtRegRewriter::readsUndefSubreg(const MachineOperand &MO) const { void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) const { if (!MI.isIdentityCopy()) return; - DEBUG(dbgs() << "Identity copy: " << MI); + LLVM_DEBUG(dbgs() << "Identity copy: " << MI); ++NumIdCopies; // Copies like: @@ -387,14 +387,14 @@ void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) const { // instruction to maintain this information. if (MI.getOperand(0).isUndef() || MI.getNumOperands() > 2) { MI.setDesc(TII->get(TargetOpcode::KILL)); - DEBUG(dbgs() << " replace by: " << MI); + LLVM_DEBUG(dbgs() << " replace by: " << MI); return; } if (Indexes) Indexes->removeSingleMachineInstrFromMaps(MI); MI.eraseFromBundle(); - DEBUG(dbgs() << " deleted.\n"); + LLVM_DEBUG(dbgs() << " deleted.\n"); } /// The liverange splitting logic sometimes produces bundles of copies when @@ -406,6 +406,8 @@ void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const { return; if (MI.isBundledWithPred() && !MI.isBundledWithSucc()) { + SmallVector<MachineInstr *, 2> MIs({&MI}); + // Only do this when the complete bundle is made out of COPYs. MachineBasicBlock &MBB = *MI.getParent(); for (MachineBasicBlock::reverse_instr_iterator I = @@ -413,16 +415,53 @@ void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const { I != E && I->isBundledWithSucc(); ++I) { if (!I->isCopy()) return; + MIs.push_back(&*I); + } + MachineInstr *FirstMI = MIs.back(); + + auto anyRegsAlias = [](const MachineInstr *Dst, + ArrayRef<MachineInstr *> Srcs, + const TargetRegisterInfo *TRI) { + for (const MachineInstr *Src : Srcs) + if (Src != Dst) + if (TRI->regsOverlap(Dst->getOperand(0).getReg(), + Src->getOperand(1).getReg())) + return true; + return false; + }; + + // If any of the destination registers in the bundle of copies alias any of + // the source registers, try to schedule the instructions to avoid any + // clobbering. + for (int E = MIs.size(), PrevE = E; E > 1; PrevE = E) { + for (int I = E; I--; ) + if (!anyRegsAlias(MIs[I], makeArrayRef(MIs).take_front(E), TRI)) { + if (I + 1 != E) + std::swap(MIs[I], MIs[E - 1]); + --E; + } + if (PrevE == E) { + MF->getFunction().getContext().emitError( + "register rewriting failed: cycle in copy bundle"); + break; + } } - for (MachineBasicBlock::reverse_instr_iterator I = MI.getReverseIterator(); - I->isBundledWithPred(); ) { - MachineInstr &MI = *I; - ++I; + MachineInstr *BundleStart = FirstMI; + for (MachineInstr *BundledMI : llvm::reverse(MIs)) { + // If instruction is in the middle of the bundle, move it before the + // bundle starts, otherwise, just unbundle it. When we get to the last + // instruction, the bundle will have been completely undone. + if (BundledMI != BundleStart) { + BundledMI->removeFromBundle(); + MBB.insert(FirstMI, BundledMI); + } else if (BundledMI->isBundledWithSucc()) { + BundledMI->unbundleFromSucc(); + BundleStart = &*std::next(BundledMI->getIterator()); + } - MI.unbundleFromPred(); - if (Indexes) - Indexes->insertMachineInstrInMaps(MI); + if (Indexes && BundledMI != FirstMI) + Indexes->insertMachineInstrInMaps(*BundledMI); } } } @@ -461,7 +500,7 @@ void VirtRegRewriter::rewrite() { for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); MBBI != MBBE; ++MBBI) { - DEBUG(MBBI->print(dbgs(), Indexes)); + LLVM_DEBUG(MBBI->print(dbgs(), Indexes)); for (MachineBasicBlock::instr_iterator MII = MBBI->instr_begin(), MIE = MBBI->instr_end(); MII != MIE;) { MachineInstr *MI = &*MII; @@ -530,7 +569,7 @@ void VirtRegRewriter::rewrite() { // Rewrite. Note we could have used MachineOperand::substPhysReg(), but // we need the inlining here. MO.setReg(PhysReg); - MO.setIsRenamableIfNoExtraRegAllocReq(); + MO.setIsRenamable(true); } // Add any missing super-register kills after rewriting the whole @@ -544,7 +583,7 @@ void VirtRegRewriter::rewrite() { while (!SuperDefs.empty()) MI->addRegisterDefined(SuperDefs.pop_back_val(), TRI); - DEBUG(dbgs() << "> " << *MI); + LLVM_DEBUG(dbgs() << "> " << *MI); expandCopyBundle(*MI); diff --git a/contrib/llvm/lib/CodeGen/WasmEHPrepare.cpp b/contrib/llvm/lib/CodeGen/WasmEHPrepare.cpp new file mode 100644 index 000000000000..83d04da5dd0c --- /dev/null +++ b/contrib/llvm/lib/CodeGen/WasmEHPrepare.cpp @@ -0,0 +1,374 @@ +//===-- WasmEHPrepare - Prepare excepton handling for WebAssembly --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This transformation is designed for use by code generators which use +// WebAssembly exception handling scheme. +// +// WebAssembly exception handling uses Windows exception IR for the middle level +// representation. This pass does the following transformation for every +// catchpad block: +// (In C-style pseudocode) +// +// - Before: +// catchpad ... +// exn = wasm.get.exception(); +// selector = wasm.get.selector(); +// ... +// +// - After: +// catchpad ... +// exn = wasm.catch(0); // 0 is a tag for C++ +// wasm.landingpad.index(index); +// // Only add below in case it's not a single catch (...) +// __wasm_lpad_context.lpad_index = index; +// __wasm_lpad_context.lsda = wasm.lsda(); +// _Unwind_CallPersonality(exn); +// int selector = __wasm.landingpad_context.selector; +// ... +// +// Also, does the following for a cleanuppad block with a call to +// __clang_call_terminate(): +// - Before: +// cleanuppad ... +// exn = wasm.get.exception(); +// __clang_call_terminate(exn); +// +// - After: +// cleanuppad ... +// exn = wasm.catch(0); // 0 is a tag for C++ +// __clang_call_terminate(exn); +// +// +// * Background: WebAssembly EH instructions +// WebAssembly's try and catch instructions are structured as follows: +// try +// instruction* +// catch (C++ tag) +// instruction* +// ... +// catch_all +// instruction* +// try_end +// +// A catch instruction in WebAssembly does not correspond to a C++ catch clause. +// In WebAssembly, there is a single catch instruction for all C++ exceptions. +// There can be more catch instructions for exceptions in other languages, but +// they are not generated for now. catch_all catches all exceptions including +// foreign exceptions (e.g. JavaScript). We turn catchpads into catch (C++ tag) +// and cleanuppads into catch_all, with one exception: cleanuppad with a call to +// __clang_call_terminate should be both in catch (C++ tag) and catch_all. +// +// +// * Background: Direct personality function call +// In WebAssembly EH, the VM is responsible for unwinding the stack once an +// exception is thrown. After the stack is unwound, the control flow is +// transfered to WebAssembly 'catch' instruction, which returns a caught +// exception object. +// +// Unwinding the stack is not done by libunwind but the VM, so the personality +// function in libcxxabi cannot be called from libunwind during the unwinding +// process. So after a catch instruction, we insert a call to a wrapper function +// in libunwind that in turn calls the real personality function. +// +// In Itanium EH, if the personality function decides there is no matching catch +// clause in a call frame and no cleanup action to perform, the unwinder doesn't +// stop there and continues unwinding. But in Wasm EH, the unwinder stops at +// every call frame with a catch intruction, after which the personality +// function is called from the compiler-generated user code here. +// +// In libunwind, we have this struct that serves as a communincation channel +// between the compiler-generated user code and the personality function in +// libcxxabi. +// +// struct _Unwind_LandingPadContext { +// uintptr_t lpad_index; +// uintptr_t lsda; +// uintptr_t selector; +// }; +// struct _Unwind_LandingPadContext __wasm_lpad_context = ...; +// +// And this wrapper in libunwind calls the personality function. +// +// _Unwind_Reason_Code _Unwind_CallPersonality(void *exception_ptr) { +// struct _Unwind_Exception *exception_obj = +// (struct _Unwind_Exception *)exception_ptr; +// _Unwind_Reason_Code ret = __gxx_personality_v0( +// 1, _UA_CLEANUP_PHASE, exception_obj->exception_class, exception_obj, +// (struct _Unwind_Context *)__wasm_lpad_context); +// return ret; +// } +// +// We pass a landing pad index, and the address of LSDA for the current function +// to the wrapper function _Unwind_CallPersonality in libunwind, and we retrieve +// the selector after it returns. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/Triple.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/WasmEHFuncInfo.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "wasmehprepare" + +namespace { +class WasmEHPrepare : public FunctionPass { + Type *LPadContextTy = nullptr; // type of 'struct _Unwind_LandingPadContext' + GlobalVariable *LPadContextGV = nullptr; // __wasm_lpad_context + + // Field addresses of struct _Unwind_LandingPadContext + Value *LPadIndexField = nullptr; // lpad_index field + Value *LSDAField = nullptr; // lsda field + Value *SelectorField = nullptr; // selector + + Function *CatchF = nullptr; // wasm.catch.extract() intrinsic + Function *LPadIndexF = nullptr; // wasm.landingpad.index() intrinsic + Function *LSDAF = nullptr; // wasm.lsda() intrinsic + Function *GetExnF = nullptr; // wasm.get.exception() intrinsic + Function *GetSelectorF = nullptr; // wasm.get.ehselector() intrinsic + Function *CallPersonalityF = nullptr; // _Unwind_CallPersonality() wrapper + Function *ClangCallTermF = nullptr; // __clang_call_terminate() function + + void prepareEHPad(BasicBlock *BB, unsigned Index); + void prepareTerminateCleanupPad(BasicBlock *BB); + +public: + static char ID; // Pass identification, replacement for typeid + + WasmEHPrepare() : FunctionPass(ID) {} + + bool doInitialization(Module &M) override; + bool runOnFunction(Function &F) override; + + StringRef getPassName() const override { + return "WebAssembly Exception handling preparation"; + } +}; +} // end anonymous namespace + +char WasmEHPrepare::ID = 0; +INITIALIZE_PASS(WasmEHPrepare, DEBUG_TYPE, "Prepare WebAssembly exceptions", + false, false) + +FunctionPass *llvm::createWasmEHPass() { return new WasmEHPrepare(); } + +bool WasmEHPrepare::doInitialization(Module &M) { + IRBuilder<> IRB(M.getContext()); + LPadContextTy = StructType::get(IRB.getInt32Ty(), // lpad_index + IRB.getInt8PtrTy(), // lsda + IRB.getInt32Ty() // selector + ); + return false; +} + +bool WasmEHPrepare::runOnFunction(Function &F) { + SmallVector<BasicBlock *, 16> CatchPads; + SmallVector<BasicBlock *, 16> CleanupPads; + for (BasicBlock &BB : F) { + if (!BB.isEHPad()) + continue; + auto *Pad = BB.getFirstNonPHI(); + if (isa<CatchPadInst>(Pad)) + CatchPads.push_back(&BB); + else if (isa<CleanupPadInst>(Pad)) + CleanupPads.push_back(&BB); + } + + if (CatchPads.empty() && CleanupPads.empty()) + return false; + assert(F.hasPersonalityFn() && "Personality function not found"); + + Module &M = *F.getParent(); + IRBuilder<> IRB(F.getContext()); + + // __wasm_lpad_context global variable + LPadContextGV = cast<GlobalVariable>( + M.getOrInsertGlobal("__wasm_lpad_context", LPadContextTy)); + LPadIndexField = IRB.CreateConstGEP2_32(LPadContextTy, LPadContextGV, 0, 0, + "lpad_index_gep"); + LSDAField = + IRB.CreateConstGEP2_32(LPadContextTy, LPadContextGV, 0, 1, "lsda_gep"); + SelectorField = IRB.CreateConstGEP2_32(LPadContextTy, LPadContextGV, 0, 2, + "selector_gep"); + + // wasm.catch() intinsic, which will be lowered to wasm 'catch' instruction. + CatchF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_catch); + // wasm.landingpad.index() intrinsic, which is to specify landingpad index + LPadIndexF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_landingpad_index); + // wasm.lsda() intrinsic. Returns the address of LSDA table for the current + // function. + LSDAF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_lsda); + // wasm.get.exception() and wasm.get.ehselector() intrinsics. Calls to these + // are generated in clang. + GetExnF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_get_exception); + GetSelectorF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_get_ehselector); + + // _Unwind_CallPersonality() wrapper function, which calls the personality + CallPersonalityF = cast<Function>(M.getOrInsertFunction( + "_Unwind_CallPersonality", IRB.getInt32Ty(), IRB.getInt8PtrTy())); + CallPersonalityF->setDoesNotThrow(); + + // __clang_call_terminate() function, which is inserted by clang in case a + // cleanup throws + ClangCallTermF = M.getFunction("__clang_call_terminate"); + + unsigned Index = 0; + for (auto *BB : CatchPads) { + auto *CPI = cast<CatchPadInst>(BB->getFirstNonPHI()); + // In case of a single catch (...), we don't need to emit LSDA + if (CPI->getNumArgOperands() == 1 && + cast<Constant>(CPI->getArgOperand(0))->isNullValue()) + prepareEHPad(BB, -1); + else + prepareEHPad(BB, Index++); + } + + if (!ClangCallTermF) + return !CatchPads.empty(); + + // Cleanuppads will turn into catch_all later, but cleanuppads with a call to + // __clang_call_terminate() is a special case. __clang_call_terminate() takes + // an exception object, so we have to duplicate call in both 'catch <C++ tag>' + // and 'catch_all' clauses. Here we only insert a call to catch; the + // duplication will be done later. In catch_all, the exception object will be + // set to null. + for (auto *BB : CleanupPads) + for (auto &I : *BB) + if (auto *CI = dyn_cast<CallInst>(&I)) + if (CI->getCalledValue() == ClangCallTermF) + prepareEHPad(BB, -1); + + return true; +} + +void WasmEHPrepare::prepareEHPad(BasicBlock *BB, unsigned Index) { + assert(BB->isEHPad() && "BB is not an EHPad!"); + IRBuilder<> IRB(BB->getContext()); + + IRB.SetInsertPoint(&*BB->getFirstInsertionPt()); + // The argument to wasm.catch() is the tag for C++ exceptions, which we set to + // 0 for this module. + // Pseudocode: void *exn = wasm.catch(0); + Instruction *Exn = IRB.CreateCall(CatchF, IRB.getInt32(0), "exn"); + // Replace the return value of wasm.get.exception() with the return value from + // wasm.catch(). + auto *FPI = cast<FuncletPadInst>(BB->getFirstNonPHI()); + Instruction *GetExnCI = nullptr, *GetSelectorCI = nullptr; + for (auto &U : FPI->uses()) { + if (auto *CI = dyn_cast<CallInst>(U.getUser())) { + if (CI->getCalledValue() == GetExnF) + GetExnCI = CI; + else if (CI->getCalledValue() == GetSelectorF) + GetSelectorCI = CI; + } + } + + assert(GetExnCI && "wasm.get.exception() call does not exist"); + GetExnCI->replaceAllUsesWith(Exn); + GetExnCI->eraseFromParent(); + + // In case it is a catchpad with single catch (...) or a cleanuppad, we don't + // need to call personality function because we don't need a selector. + if (FPI->getNumArgOperands() == 0 || + (FPI->getNumArgOperands() == 1 && + cast<Constant>(FPI->getArgOperand(0))->isNullValue())) { + if (GetSelectorCI) { + assert(GetSelectorCI->use_empty() && + "wasm.get.ehselector() still has uses!"); + GetSelectorCI->eraseFromParent(); + } + return; + } + IRB.SetInsertPoint(Exn->getNextNode()); + + // This is to create a map of <landingpad EH label, landingpad index> in + // SelectionDAGISel, which is to be used in EHStreamer to emit LSDA tables. + // Pseudocode: wasm.landingpad.index(Index); + IRB.CreateCall(LPadIndexF, IRB.getInt32(Index)); + + // Pseudocode: __wasm_lpad_context.lpad_index = index; + IRB.CreateStore(IRB.getInt32(Index), LPadIndexField); + + // Store LSDA address only if this catchpad belongs to a top-level + // catchswitch. If there is another catchpad that dominates this pad, we don't + // need to store LSDA address again, because they are the same throughout the + // function and have been already stored before. + // TODO Can we not store LSDA address in user function but make libcxxabi + // compute it? + auto *CPI = cast<CatchPadInst>(FPI); + if (isa<ConstantTokenNone>(CPI->getCatchSwitch()->getParentPad())) + // Pseudocode: __wasm_lpad_context.lsda = wasm.lsda(); + IRB.CreateStore(IRB.CreateCall(LSDAF), LSDAField); + + // Pseudocode: _Unwind_CallPersonality(exn); + CallInst *PersCI = + IRB.CreateCall(CallPersonalityF, Exn, OperandBundleDef("funclet", CPI)); + PersCI->setDoesNotThrow(); + + // Pseudocode: int selector = __wasm.landingpad_context.selector; + Instruction *Selector = IRB.CreateLoad(SelectorField, "selector"); + + // Replace the return value from wasm.get.ehselector() with the selector value + // loaded from __wasm_lpad_context.selector. + assert(GetSelectorCI && "wasm.get.ehselector() call does not exist"); + GetSelectorCI->replaceAllUsesWith(Selector); + GetSelectorCI->eraseFromParent(); +} + +void llvm::calculateWasmEHInfo(const Function *F, WasmEHFuncInfo &EHInfo) { + for (const auto &BB : *F) { + if (!BB.isEHPad()) + continue; + const Instruction *Pad = BB.getFirstNonPHI(); + + // If an exception is not caught by a catchpad (i.e., it is a foreign + // exception), it will unwind to its parent catchswitch's unwind + // destination. We don't record an unwind destination for cleanuppads + // because every exception should be caught by it. + if (const auto *CatchPad = dyn_cast<CatchPadInst>(Pad)) { + const auto *UnwindBB = CatchPad->getCatchSwitch()->getUnwindDest(); + if (!UnwindBB) + continue; + const Instruction *UnwindPad = UnwindBB->getFirstNonPHI(); + if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(UnwindPad)) + // Currently there should be only one handler per a catchswitch. + EHInfo.setEHPadUnwindDest(&BB, *CatchSwitch->handlers().begin()); + else // cleanuppad + EHInfo.setEHPadUnwindDest(&BB, UnwindBB); + } + } + + // Record the unwind destination for invoke and cleanupret instructions. + for (const auto &BB : *F) { + const Instruction *TI = BB.getTerminator(); + BasicBlock *UnwindBB = nullptr; + if (const auto *Invoke = dyn_cast<InvokeInst>(TI)) + UnwindBB = Invoke->getUnwindDest(); + else if (const auto *CleanupRet = dyn_cast<CleanupReturnInst>(TI)) + UnwindBB = CleanupRet->getUnwindDest(); + if (!UnwindBB) + continue; + const Instruction *UnwindPad = UnwindBB->getFirstNonPHI(); + if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(UnwindPad)) + // Currently there should be only one handler per a catchswitch. + EHInfo.setThrowUnwindDest(&BB, *CatchSwitch->handlers().begin()); + else // cleanuppad + EHInfo.setThrowUnwindDest(&BB, UnwindBB); + } +} diff --git a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp index 0b16a113640d..e629c13f133f 100644 --- a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/WinEHFuncInfo.h" @@ -31,7 +32,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; @@ -41,7 +41,7 @@ using namespace llvm; static cl::opt<bool> DisableDemotion( "disable-demotion", cl::Hidden, cl::desc( - "Clone multicolor basic blocks but do not demote cross funclet values"), + "Clone multicolor basic blocks but do not demote cross scopes"), cl::init(false)); static cl::opt<bool> DisableCleanups( @@ -49,12 +49,17 @@ static cl::opt<bool> DisableCleanups( cl::desc("Do not remove implausible terminators or other similar cleanups"), cl::init(false)); +static cl::opt<bool> DemoteCatchSwitchPHIOnlyOpt( + "demote-catchswitch-only", cl::Hidden, + cl::desc("Demote catchswitch BBs only (for wasm EH)"), cl::init(false)); + namespace { class WinEHPrepare : public FunctionPass { public: static char ID; // Pass identification, replacement for typeid. - WinEHPrepare() : FunctionPass(ID) {} + WinEHPrepare(bool DemoteCatchSwitchPHIOnly = false) + : FunctionPass(ID), DemoteCatchSwitchPHIOnly(DemoteCatchSwitchPHIOnly) {} bool runOnFunction(Function &Fn) override; @@ -77,12 +82,14 @@ private: bool prepareExplicitEH(Function &F); void colorFunclets(Function &F); - void demotePHIsOnFunclets(Function &F); + void demotePHIsOnFunclets(Function &F, bool DemoteCatchSwitchPHIOnly); void cloneCommonBlocks(Function &F); void removeImplausibleInstructions(Function &F); void cleanupPreparedFunclets(Function &F); void verifyPreparedFunclets(Function &F); + bool DemoteCatchSwitchPHIOnly; + // All fields are reset by runOnFunction. EHPersonality Personality = EHPersonality::Unknown; @@ -97,7 +104,9 @@ char WinEHPrepare::ID = 0; INITIALIZE_PASS(WinEHPrepare, DEBUG_TYPE, "Prepare Windows exceptions", false, false) -FunctionPass *llvm::createWinEHPass() { return new WinEHPrepare(); } +FunctionPass *llvm::createWinEHPass(bool DemoteCatchSwitchPHIOnly) { + return new WinEHPrepare(DemoteCatchSwitchPHIOnly); +} bool WinEHPrepare::runOnFunction(Function &Fn) { if (!Fn.hasPersonalityFn()) @@ -106,8 +115,8 @@ bool WinEHPrepare::runOnFunction(Function &Fn) { // Classify the personality to see what kind of preparation we need. Personality = classifyEHPersonality(Fn.getPersonalityFn()); - // Do nothing if this is not a funclet-based personality. - if (!isFuncletEHPersonality(Personality)) + // Do nothing if this is not a scope-based personality. + if (!isScopedEHPersonality(Personality)) return false; DL = &Fn.getParent()->getDataLayout(); @@ -271,10 +280,11 @@ static void calculateCXXStateNumbers(WinEHFuncInfo &FuncInfo, } int CatchHigh = FuncInfo.getLastStateNumber(); addTryBlockMapEntry(FuncInfo, TryLow, TryHigh, CatchHigh, Handlers); - DEBUG(dbgs() << "TryLow[" << BB->getName() << "]: " << TryLow << '\n'); - DEBUG(dbgs() << "TryHigh[" << BB->getName() << "]: " << TryHigh << '\n'); - DEBUG(dbgs() << "CatchHigh[" << BB->getName() << "]: " << CatchHigh - << '\n'); + LLVM_DEBUG(dbgs() << "TryLow[" << BB->getName() << "]: " << TryLow << '\n'); + LLVM_DEBUG(dbgs() << "TryHigh[" << BB->getName() << "]: " << TryHigh + << '\n'); + LLVM_DEBUG(dbgs() << "CatchHigh[" << BB->getName() << "]: " << CatchHigh + << '\n'); } else { auto *CleanupPad = cast<CleanupPadInst>(FirstNonPHI); @@ -285,8 +295,8 @@ static void calculateCXXStateNumbers(WinEHFuncInfo &FuncInfo, int CleanupState = addUnwindMapEntry(FuncInfo, ParentState, BB); FuncInfo.EHPadStateMap[CleanupPad] = CleanupState; - DEBUG(dbgs() << "Assigning state #" << CleanupState << " to BB " - << BB->getName() << '\n'); + LLVM_DEBUG(dbgs() << "Assigning state #" << CleanupState << " to BB " + << BB->getName() << '\n'); for (const BasicBlock *PredBlock : predecessors(BB)) { if ((PredBlock = getEHPadFromPredecessor(PredBlock, CleanupPad->getParentPad()))) { @@ -351,8 +361,8 @@ static void calculateSEHStateNumbers(WinEHFuncInfo &FuncInfo, // Everything in the __try block uses TryState as its parent state. FuncInfo.EHPadStateMap[CatchSwitch] = TryState; - DEBUG(dbgs() << "Assigning state #" << TryState << " to BB " - << CatchPadBB->getName() << '\n'); + LLVM_DEBUG(dbgs() << "Assigning state #" << TryState << " to BB " + << CatchPadBB->getName() << '\n'); for (const BasicBlock *PredBlock : predecessors(BB)) if ((PredBlock = getEHPadFromPredecessor(PredBlock, CatchSwitch->getParentPad()))) @@ -387,8 +397,8 @@ static void calculateSEHStateNumbers(WinEHFuncInfo &FuncInfo, int CleanupState = addSEHFinally(FuncInfo, ParentState, BB); FuncInfo.EHPadStateMap[CleanupPad] = CleanupState; - DEBUG(dbgs() << "Assigning state #" << CleanupState << " to BB " - << BB->getName() << '\n'); + LLVM_DEBUG(dbgs() << "Assigning state #" << CleanupState << " to BB " + << BB->getName() << '\n'); for (const BasicBlock *PredBlock : predecessors(BB)) if ((PredBlock = getEHPadFromPredecessor(PredBlock, CleanupPad->getParentPad()))) @@ -677,13 +687,17 @@ void WinEHPrepare::colorFunclets(Function &F) { } } -void WinEHPrepare::demotePHIsOnFunclets(Function &F) { +void WinEHPrepare::demotePHIsOnFunclets(Function &F, + bool DemoteCatchSwitchPHIOnly) { // Strip PHI nodes off of EH pads. SmallVector<PHINode *, 16> PHINodes; for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) { BasicBlock *BB = &*FI++; if (!BB->isEHPad()) continue; + if (DemoteCatchSwitchPHIOnly && !isa<CatchSwitchInst>(BB->getFirstNonPHI())) + continue; + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { Instruction *I = &*BI++; auto *PN = dyn_cast<PHINode>(I); @@ -1031,20 +1045,21 @@ bool WinEHPrepare::prepareExplicitEH(Function &F) { cloneCommonBlocks(F); if (!DisableDemotion) - demotePHIsOnFunclets(F); + demotePHIsOnFunclets(F, DemoteCatchSwitchPHIOnly || + DemoteCatchSwitchPHIOnlyOpt); if (!DisableCleanups) { - DEBUG(verifyFunction(F)); + LLVM_DEBUG(verifyFunction(F)); removeImplausibleInstructions(F); - DEBUG(verifyFunction(F)); + LLVM_DEBUG(verifyFunction(F)); cleanupPreparedFunclets(F); } - DEBUG(verifyPreparedFunclets(F)); + LLVM_DEBUG(verifyPreparedFunclets(F)); // Recolor the CFG to verify that all is well. - DEBUG(colorFunclets(F)); - DEBUG(verifyPreparedFunclets(F)); + LLVM_DEBUG(colorFunclets(F)); + LLVM_DEBUG(verifyPreparedFunclets(F)); BlockColors.clear(); FuncletBlocks.clear(); diff --git a/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp b/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp index 3d83afcf1fc5..32a7457c2060 100644 --- a/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp +++ b/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp @@ -52,7 +52,6 @@ struct XRayInstrumentation : public MachineFunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); - AU.addRequired<MachineLoopInfo>(); AU.addPreserved<MachineLoopInfo>(); AU.addPreserved<MachineDominatorTree>(); MachineFunctionPass::getAnalysisUsage(AU); @@ -160,11 +159,26 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) { for (const auto &MBB : MF) MICount += MBB.size(); + // Get MachineDominatorTree or compute it on the fly if it's unavailable + auto *MDT = getAnalysisIfAvailable<MachineDominatorTree>(); + MachineDominatorTree ComputedMDT; + if (!MDT) { + ComputedMDT.getBase().recalculate(MF); + MDT = &ComputedMDT; + } + + // Get MachineLoopInfo or compute it on the fly if it's unavailable + auto *MLI = getAnalysisIfAvailable<MachineLoopInfo>(); + MachineLoopInfo ComputedMLI; + if (!MLI) { + ComputedMLI.getBase().analyze(MDT->getBase()); + MLI = &ComputedMLI; + } + // Check if we have a loop. // FIXME: Maybe make this smarter, and see whether the loops are dependent // on inputs or side-effects? - MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); - if (MLI.empty() && MICount < XRayThreshold) + if (MLI->empty() && MICount < XRayThreshold) return false; // Function is too small and has no loops. } |