diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2015-12-30 13:13:10 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2015-12-30 13:13:10 +0000 |
commit | 7d523365ff1a3cc95bc058b33102500f61e8166d (patch) | |
tree | b466a4817f79516eb1df8eae92bccf62ecc84003 /contrib/llvm/lib/CodeGen | |
parent | e3b65fde506060bec5cd110fcf03b440bd0eea1d (diff) | |
parent | dd58ef019b700900793a1eb48b52123db01b654e (diff) |
Update llvm to trunk r256633.
Notes
Notes:
svn path=/projects/clang380-import/; revision=292941
Diffstat (limited to 'contrib/llvm/lib/CodeGen')
158 files changed, 17505 insertions, 10454 deletions
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 5fe4c4bcaec4..4060db74a9b7 100644 --- a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -142,16 +142,15 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { assert(!State); State = new AggressiveAntiDepState(TRI->getNumRegs(), BB); - bool IsReturnBlock = (!BB->empty() && BB->back().isReturn()); + bool IsReturnBlock = BB->isReturnBlock(); std::vector<unsigned> &KillIndices = State->GetKillIndices(); std::vector<unsigned> &DefIndices = State->GetDefIndices(); // Examine the live-in regs of all successors. for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) - for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), - E = (*SI)->livein_end(); I != E; ++I) { - for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) { + for (const auto &LI : (*SI)->liveins()) { + for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI) { unsigned Reg = *AI; State->UnionGroups(Reg, 0); KillIndices[Reg] = BB->size(); @@ -365,9 +364,11 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, // If MI's defs have a special allocation requirement, don't allow // any def registers to be changed. Also assume all registers - // defined in a call must not be changed (ABI). + // defined in a call must not be changed (ABI). Inline assembly may + // reference either system calls or the register directly. Skip it until we + // can tell user specified registers from compiler-specified. if (MI->isCall() || MI->hasExtraDefRegAllocReq() || - TII->isPredicated(MI)) { + TII->isPredicated(MI) || MI->isInlineAsm()) { DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)"); State->UnionGroups(Reg, 0); } @@ -429,6 +430,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, // If MI's uses have special allocation requirement, don't allow // any use registers to be changed. Also assume all registers // used in a call must not be changed (ABI). + // Inline Assembly register uses also cannot be safely changed. // FIXME: The issue with predicated instruction is more complex. We are being // conservatively here because the kill markers cannot be trusted after // if-conversion: @@ -444,7 +446,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, // changed. bool Special = MI->isCall() || MI->hasExtraSrcRegAllocReq() || - TII->isPredicated(MI); + TII->isPredicated(MI) || MI->isInlineAsm(); // Scan the register uses for this instruction and update // live-ranges, groups and RegRefs. @@ -509,15 +511,8 @@ BitVector AggressiveAntiDepBreaker::GetRenameRegisters(unsigned Reg) { // Check all references that need rewriting for Reg. For each, use // the corresponding register class to narrow the set of registers // that are appropriate for renaming. - std::pair<std::multimap<unsigned, - AggressiveAntiDepState::RegisterReference>::iterator, - std::multimap<unsigned, - AggressiveAntiDepState::RegisterReference>::iterator> - Range = State->GetRegRefs().equal_range(Reg); - for (std::multimap<unsigned, - AggressiveAntiDepState::RegisterReference>::iterator Q = Range.first, - QE = Range.second; Q != QE; ++Q) { - const TargetRegisterClass *RC = Q->second.RC; + for (const auto &Q : make_range(State->GetRegRefs().equal_range(Reg))) { + const TargetRegisterClass *RC = Q.second.RC; if (!RC) continue; BitVector RCBV = TRI->getAllocatableSet(MF, RC); @@ -685,9 +680,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( // We cannot rename 'Reg' to 'NewReg' if one of the uses of 'Reg' also // defines 'NewReg' via an early-clobber operand. - auto Range = RegRefs.equal_range(Reg); - for (auto Q = Range.first, QE = Range.second; Q != QE; ++Q) { - auto UseMI = Q->second.Operand->getParent(); + for (const auto &Q : make_range(RegRefs.equal_range(Reg))) { + MachineInstr *UseMI = Q.second.Operand->getParent(); int Idx = UseMI->findRegisterDefOperandIdx(NewReg, false, true, TRI); if (Idx == -1) continue; @@ -698,6 +692,20 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( } } + // Also, we cannot rename 'Reg' to 'NewReg' if the instruction defining + // 'Reg' is an early-clobber define and that instruction also uses + // 'NewReg'. + for (const auto &Q : make_range(RegRefs.equal_range(Reg))) { + if (!Q.second.Operand->isDef() || !Q.second.Operand->isEarlyClobber()) + continue; + + MachineInstr *DefMI = Q.second.Operand->getParent(); + if (DefMI->readsRegister(NewReg, TRI)) { + DEBUG(dbgs() << "(ec)"); + goto next_super_reg; + } + } + // Record that 'Reg' can be renamed to 'NewReg'. RenameMap.insert(std::pair<unsigned, unsigned>(Reg, NewReg)); } @@ -920,23 +928,16 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( // Update the references to the old register CurrReg to // refer to the new register NewReg. - std::pair<std::multimap<unsigned, - AggressiveAntiDepState::RegisterReference>::iterator, - std::multimap<unsigned, - AggressiveAntiDepState::RegisterReference>::iterator> - Range = RegRefs.equal_range(CurrReg); - for (std::multimap<unsigned, - AggressiveAntiDepState::RegisterReference>::iterator - Q = Range.first, QE = Range.second; Q != QE; ++Q) { - Q->second.Operand->setReg(NewReg); + for (const auto &Q : make_range(RegRefs.equal_range(CurrReg))) { + Q.second.Operand->setReg(NewReg); // If the SU for the instruction being updated has debug // information related to the anti-dependency register, make // sure to update that as well. - const SUnit *SU = MISUnitMap[Q->second.Operand->getParent()]; + const SUnit *SU = MISUnitMap[Q.second.Operand->getParent()]; if (!SU) continue; for (DbgValueVector::iterator DVI = DbgValues.begin(), DVE = DbgValues.end(); DVI != DVE; ++DVI) - if (DVI->second == Q->second.Operand->getParent()) + if (DVI->second == Q.second.Operand->getParent()) UpdateDbgValue(DVI->first, AntiDepReg, NewReg); } diff --git a/contrib/llvm/lib/CodeGen/AllocationOrder.cpp b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp index dc9bcff56121..40451c0d6c19 100644 --- a/contrib/llvm/lib/CodeGen/AllocationOrder.cpp +++ b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp @@ -29,12 +29,13 @@ using namespace llvm; // Compare VirtRegMap::getRegAllocPref(). AllocationOrder::AllocationOrder(unsigned VirtReg, const VirtRegMap &VRM, - const RegisterClassInfo &RegClassInfo) + const RegisterClassInfo &RegClassInfo, + const LiveRegMatrix *Matrix) : Pos(0) { const MachineFunction &MF = VRM.getMachineFunction(); const TargetRegisterInfo *TRI = &VRM.getTargetRegInfo(); Order = RegClassInfo.getOrder(MF.getRegInfo().getRegClass(VirtReg)); - TRI->getRegAllocationHints(VirtReg, Order, Hints, MF, &VRM); + TRI->getRegAllocationHints(VirtReg, Order, Hints, MF, &VRM, Matrix); rewind(); DEBUG({ diff --git a/contrib/llvm/lib/CodeGen/AllocationOrder.h b/contrib/llvm/lib/CodeGen/AllocationOrder.h index 02b2d9250bc8..2aee3a63a2b1 100644 --- a/contrib/llvm/lib/CodeGen/AllocationOrder.h +++ b/contrib/llvm/lib/CodeGen/AllocationOrder.h @@ -24,6 +24,7 @@ namespace llvm { class RegisterClassInfo; class VirtRegMap; +class LiveRegMatrix; class LLVM_LIBRARY_VISIBILITY AllocationOrder { SmallVector<MCPhysReg, 16> Hints; @@ -37,7 +38,8 @@ public: /// @param RegClassInfo Information about reserved and allocatable registers. AllocationOrder(unsigned VirtReg, const VirtRegMap &VRM, - const RegisterClassInfo &RegClassInfo); + const RegisterClassInfo &RegClassInfo, + const LiveRegMatrix *Matrix); /// Get the allocation order without reordered hints. ArrayRef<MCPhysReg> getOrder() const { return Order; } diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp index 98d4c8afc7b9..75579a2b4559 100644 --- a/contrib/llvm/lib/CodeGen/Analysis.cpp +++ b/contrib/llvm/lib/CodeGen/Analysis.cpp @@ -14,6 +14,7 @@ #include "llvm/CodeGen/Analysis.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -25,6 +26,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Utils/GlobalStatus.h" @@ -515,7 +517,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) { if (isa<DbgInfoIntrinsic>(BBI)) continue; if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() || - !isSafeToSpeculativelyExecute(BBI)) + !isSafeToSpeculativelyExecute(&*BBI)) return false; } @@ -643,3 +645,97 @@ bool llvm::canBeOmittedFromSymbolTable(const GlobalValue *GV) { return !GS.IsCompared; } + +static void collectFuncletMembers( + DenseMap<const MachineBasicBlock *, int> &FuncletMembership, int Funclet, + const MachineBasicBlock *MBB) { + // Add this MBB to our funclet. + auto P = FuncletMembership.insert(std::make_pair(MBB, Funclet)); + + // Don't revisit blocks. + if (!P.second) { + assert(P.first->second == Funclet && "MBB is part of two funclets!"); + return; + } + + bool IsReturn = false; + int NumTerminators = 0; + for (const MachineInstr &MI : MBB->terminators()) { + IsReturn |= MI.isReturn(); + ++NumTerminators; + } + assert((!IsReturn || NumTerminators == 1) && + "Expected only one terminator when a return is present!"); + + // Returns are boundaries where funclet transfer can occur, don't follow + // successors. + if (IsReturn) + return; + + for (const MachineBasicBlock *SMBB : MBB->successors()) + if (!SMBB->isEHPad()) + collectFuncletMembers(FuncletMembership, Funclet, SMBB); +} + +DenseMap<const MachineBasicBlock *, int> +llvm::getFuncletMembership(const MachineFunction &MF) { + DenseMap<const MachineBasicBlock *, int> FuncletMembership; + + // We don't have anything to do if there aren't any EH pads. + if (!MF.getMMI().hasEHFunclets()) + return FuncletMembership; + + int EntryBBNumber = MF.front().getNumber(); + bool IsSEH = isAsynchronousEHPersonality( + classifyEHPersonality(MF.getFunction()->getPersonalityFn())); + + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + SmallVector<const MachineBasicBlock *, 16> FuncletBlocks; + SmallVector<const MachineBasicBlock *, 16> UnreachableBlocks; + SmallVector<const MachineBasicBlock *, 16> SEHCatchPads; + SmallVector<std::pair<const MachineBasicBlock *, int>, 16> CatchRetSuccessors; + for (const MachineBasicBlock &MBB : MF) { + if (MBB.isEHFuncletEntry()) { + FuncletBlocks.push_back(&MBB); + } else if (IsSEH && MBB.isEHPad()) { + SEHCatchPads.push_back(&MBB); + } else if (MBB.pred_empty()) { + UnreachableBlocks.push_back(&MBB); + } + + MachineBasicBlock::const_iterator MBBI = MBB.getFirstTerminator(); + // CatchPads are not funclets for SEH so do not consider CatchRet to + // transfer control to another funclet. + if (MBBI->getOpcode() != TII->getCatchReturnOpcode()) + continue; + + // FIXME: SEH CatchPads are not necessarily in the parent function: + // they could be inside a finally block. + const MachineBasicBlock *Successor = MBBI->getOperand(0).getMBB(); + const MachineBasicBlock *SuccessorColor = MBBI->getOperand(1).getMBB(); + CatchRetSuccessors.push_back( + {Successor, IsSEH ? EntryBBNumber : SuccessorColor->getNumber()}); + } + + // We don't have anything to do if there aren't any EH pads. + if (FuncletBlocks.empty()) + return FuncletMembership; + + // Identify all the basic blocks reachable from the function entry. + collectFuncletMembers(FuncletMembership, EntryBBNumber, &MF.front()); + // All blocks not part of a funclet are in the parent function. + for (const MachineBasicBlock *MBB : UnreachableBlocks) + collectFuncletMembers(FuncletMembership, EntryBBNumber, MBB); + // Next, identify all the blocks inside the funclets. + for (const MachineBasicBlock *MBB : FuncletBlocks) + collectFuncletMembers(FuncletMembership, MBB->getNumber(), MBB); + // SEH CatchPads aren't really funclets, handle them separately. + for (const MachineBasicBlock *MBB : SEHCatchPads) + collectFuncletMembers(FuncletMembership, EntryBBNumber, MBB); + // Finally, identify all the targets of a catchret. + for (std::pair<const MachineBasicBlock *, int> CatchRetPair : + CatchRetSuccessors) + collectFuncletMembers(FuncletMembership, CatchRetPair.second, + CatchRetPair.first); + return FuncletMembership; +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp index 0bad7954b980..ade2d7105b88 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -73,7 +73,6 @@ void ARMException::endFunction(const MachineFunction *MF) { const Function *Per = nullptr; if (F->hasPersonalityFn()) Per = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts()); - assert(!MMI->getPersonality() || Per == MMI->getPersonality()); bool forceEmitPersonality = F->hasPersonalityFn() && !isNoOpWithoutInvoke(classifyEHPersonality(Per)) && F->needsUnwindTableEntry(); @@ -115,9 +114,7 @@ void ARMException::emitTypeInfos(unsigned TTypeEncoding) { Entry = TypeInfos.size(); } - for (std::vector<const GlobalValue *>::const_reverse_iterator - I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) { - const GlobalValue *GV = *I; + for (const GlobalValue *GV : reverse(TypeInfos)) { if (VerboseAsm) Asm->OutStreamer->AddComment("TypeInfo " + Twine(Entry--)); Asm->EmitTTypeReference(GV, TTypeEncoding); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 125047e7bbb5..be7eafbeb83d 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -135,11 +135,14 @@ const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const { return *TM.getObjFileLowering(); } -/// getDataLayout - Return information about data layout. const DataLayout &AsmPrinter::getDataLayout() const { - return *TM.getDataLayout(); + return MMI->getModule()->getDataLayout(); } +// Do not use the cached DataLayout because some client use it without a Module +// (llmv-dsymutil, llvm-dwarfdump). +unsigned AsmPrinter::getPointerSize() const { return TM.getPointerSize(); } + const MCSubtargetInfo &AsmPrinter::getSubtargetInfo() const { assert(MF && "getSubtargetInfo requires a valid MachineFunction!"); return MF->getSubtarget<MCSubtargetInfo>(); @@ -193,10 +196,18 @@ bool AsmPrinter::doInitialization(Module &M) { unsigned Major, Minor, Update; TT.getOSVersion(Major, Minor, Update); // If there is a version specified, Major will be non-zero. - if (Major) - OutStreamer->EmitVersionMin((TT.isMacOSX() ? - MCVM_OSXVersionMin : MCVM_IOSVersionMin), - Major, Minor, Update); + if (Major) { + MCVersionMinType VersionType; + if (TT.isWatchOS()) + VersionType = MCVM_WatchOSVersionMin; + else if (TT.isTvOS()) + VersionType = MCVM_TvOSVersionMin; + else if (TT.isMacOSX()) + VersionType = MCVM_OSXVersionMin; + else + VersionType = MCVM_IOSVersionMin; + OutStreamer->EmitVersionMin(VersionType, Major, Minor, Update); + } } // Allow the target to emit any magic that it wants at the start of the file. @@ -224,28 +235,20 @@ bool AsmPrinter::doInitialization(Module &M) { TM.getTargetFeatureString())); OutStreamer->AddComment("Start of file scope inline assembly"); OutStreamer->AddBlankLine(); - EmitInlineAsm(M.getModuleInlineAsm()+"\n", *STI, TM.Options.MCOptions); + EmitInlineAsm(M.getModuleInlineAsm()+"\n", + OutContext.getSubtargetCopy(*STI), TM.Options.MCOptions); OutStreamer->AddComment("End of file scope inline assembly"); OutStreamer->AddBlankLine(); } if (MAI->doesSupportDebugInformation()) { - bool skip_dwarf = false; - if (TM.getTargetTriple().isKnownWindowsMSVCEnvironment()) { + bool EmitCodeView = MMI->getModule()->getCodeViewFlag(); + if (EmitCodeView && TM.getTargetTriple().isKnownWindowsMSVCEnvironment()) { Handlers.push_back(HandlerInfo(new WinCodeViewLineTables(this), DbgTimerName, CodeViewLineTablesGroupName)); - // FIXME: Don't emit DWARF debug info if there's at least one function - // with AddressSanitizer instrumentation. - // This is a band-aid fix for PR22032. - for (auto &F : M.functions()) { - if (F.hasFnAttribute(Attribute::SanitizeAddress)) { - skip_dwarf = true; - break; - } - } } - if (!skip_dwarf) { + if (!EmitCodeView || MMI->getModule()->getDwarfVersion()) { DD = new DwarfDebug(this, &M); Handlers.push_back(HandlerInfo(DD, DbgTimerName, DWARFGroupName)); } @@ -340,8 +343,51 @@ MCSymbol *AsmPrinter::getSymbol(const GlobalValue *GV) const { return TM.getSymbol(GV, *Mang); } +static MCSymbol *getOrCreateEmuTLSControlSym(MCSymbol *GVSym, MCContext &C) { + return C.getOrCreateSymbol(Twine("__emutls_v.") + GVSym->getName()); +} + +static MCSymbol *getOrCreateEmuTLSInitSym(MCSymbol *GVSym, MCContext &C) { + return C.getOrCreateSymbol(Twine("__emutls_t.") + GVSym->getName()); +} + +/// EmitEmulatedTLSControlVariable - Emit the control variable for an emulated TLS variable. +void AsmPrinter::EmitEmulatedTLSControlVariable(const GlobalVariable *GV, + MCSymbol *EmittedSym, + bool AllZeroInitValue) { + MCSection *TLSVarSection = getObjFileLowering().getDataSection(); + OutStreamer->SwitchSection(TLSVarSection); + MCSymbol *GVSym = getSymbol(GV); + EmitLinkage(GV, EmittedSym); // same linkage as GV + const DataLayout &DL = GV->getParent()->getDataLayout(); + uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType()); + unsigned AlignLog = getGVAlignmentLog2(GV, DL); + unsigned WordSize = DL.getPointerSize(); + unsigned Alignment = DL.getPointerABIAlignment(); + EmitAlignment(Log2_32(Alignment)); + OutStreamer->EmitLabel(EmittedSym); + OutStreamer->EmitIntValue(Size, WordSize); + OutStreamer->EmitIntValue((1 << AlignLog), WordSize); + OutStreamer->EmitIntValue(0, WordSize); + if (GV->hasInitializer() && !AllZeroInitValue) { + OutStreamer->EmitSymbolValue( + getOrCreateEmuTLSInitSym(GVSym, OutContext), WordSize); + } else + OutStreamer->EmitIntValue(0, WordSize); + if (MAI->hasDotTypeDotSizeDirective()) + OutStreamer->emitELFSize(cast<MCSymbolELF>(EmittedSym), + MCConstantExpr::create(4 * WordSize, OutContext)); + OutStreamer->AddBlankLine(); // End of the __emutls_v.* variable. +} + /// EmitGlobalVariable - Emit the specified global variable to the .s file. void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { + bool IsEmuTLSVar = + GV->getThreadLocalMode() != llvm::GlobalVariable::NotThreadLocal && + TM.Options.EmulatedTLS; + assert(!(IsEmuTLSVar && GV->hasCommonLinkage()) && + "No emulated TLS variables in the common section"); + if (GV->hasInitializer()) { // Check to see if this is a special global used by LLVM, if so, emit it. if (EmitSpecialLLVMGlobal(GV)) @@ -352,7 +398,9 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { if (GlobalGOTEquivs.count(getSymbol(GV))) return; - if (isVerbose()) { + if (isVerbose() && !IsEmuTLSVar) { + // When printing the control variable __emutls_v.*, + // we don't need to print the original TLS variable name. GV->printAsOperand(OutStreamer->GetCommentOS(), /*PrintType=*/false, GV->getParent()); OutStreamer->GetCommentOS() << '\n'; @@ -360,7 +408,12 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { } MCSymbol *GVSym = getSymbol(GV); - EmitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration()); + MCSymbol *EmittedSym = IsEmuTLSVar ? + getOrCreateEmuTLSControlSym(GVSym, OutContext) : GVSym; + // getOrCreateEmuTLSControlSym only creates the symbol with name and default attributes. + // GV's or GVSym's attributes will be used for the EmittedSym. + + EmitVisibility(EmittedSym, GV->getVisibility(), !GV->isDeclaration()); if (!GV->hasInitializer()) // External globals require no extra code. return; @@ -371,17 +424,29 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { "' is already defined"); if (MAI->hasDotTypeDotSizeDirective()) - OutStreamer->EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject); + OutStreamer->EmitSymbolAttribute(EmittedSym, MCSA_ELF_TypeObject); SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM); - const DataLayout *DL = TM.getDataLayout(); - uint64_t Size = DL->getTypeAllocSize(GV->getType()->getElementType()); + const DataLayout &DL = GV->getParent()->getDataLayout(); + uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType()); // If the alignment is specified, we *must* obey it. Overaligning a global // with a specified alignment is a prompt way to break globals emitted to // sections and expected to be contiguous (e.g. ObjC metadata). - unsigned AlignLog = getGVAlignmentLog2(GV, *DL); + unsigned AlignLog = getGVAlignmentLog2(GV, DL); + + bool AllZeroInitValue = false; + const Constant *InitValue = GV->getInitializer(); + if (isa<ConstantAggregateZero>(InitValue)) + AllZeroInitValue = true; + else { + const ConstantInt *InitIntValue = dyn_cast<ConstantInt>(InitValue); + if (InitIntValue && InitIntValue->isZero()) + AllZeroInitValue = true; + } + if (IsEmuTLSVar) + EmitEmulatedTLSControlVariable(GV, EmittedSym, AllZeroInitValue); for (const HandlerInfo &HI : Handlers) { NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled); @@ -390,6 +455,8 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // Handle common and BSS local symbols (.lcomm). if (GVKind.isCommon() || GVKind.isBSSLocal()) { + assert(!(IsEmuTLSVar && GVKind.isCommon()) && + "No emulated TLS variables in the common section"); if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. unsigned Align = 1 << AlignLog; @@ -434,12 +501,21 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { return; } - MCSection *TheSection = + if (IsEmuTLSVar && AllZeroInitValue) + return; // No need of initialization values. + + MCSymbol *EmittedInitSym = IsEmuTLSVar ? + getOrCreateEmuTLSInitSym(GVSym, OutContext) : GVSym; + // getOrCreateEmuTLSInitSym only creates the symbol with name and default attributes. + // GV's or GVSym's attributes will be used for the EmittedInitSym. + + MCSection *TheSection = IsEmuTLSVar ? + getObjFileLowering().getReadOnlySection() : getObjFileLowering().SectionForGlobal(GV, GVKind, *Mang, TM); // Handle the zerofill directive on darwin, which is a special form of BSS // emission. - if (GVKind.isBSSExtern() && MAI->hasMachoZeroFillDirective()) { + if (GVKind.isBSSExtern() && MAI->hasMachoZeroFillDirective() && !IsEmuTLSVar) { if (Size == 0) Size = 1; // zerofill of 0 bytes is undefined. // .globl _foo @@ -459,7 +535,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // TLOF class. This will also make it more obvious that stuff like // MCStreamer::EmitTBSSSymbol is macho specific and only called from macho // specific code. - if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) { + if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective() && !IsEmuTLSVar) { // Emit the .tbss symbol MCSymbol *MangSym = OutContext.getOrCreateSymbol(GVSym->getName() + Twine("$tlv$init")); @@ -473,7 +549,8 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { EmitAlignment(AlignLog, GV); OutStreamer->EmitLabel(MangSym); - EmitGlobalConstant(GV->getInitializer()); + EmitGlobalConstant(GV->getParent()->getDataLayout(), + GV->getInitializer()); } OutStreamer->AddBlankLine(); @@ -490,7 +567,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // - __tlv_bootstrap - used to make sure support exists // - spare pointer, used when mapped by the runtime // - pointer to mangled symbol above with initializer - unsigned PtrSize = DL->getPointerTypeSize(GV->getType()); + unsigned PtrSize = DL.getPointerTypeSize(GV->getType()); OutStreamer->EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"), PtrSize); OutStreamer->EmitIntValue(0, PtrSize); @@ -502,16 +579,18 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { OutStreamer->SwitchSection(TheSection); - EmitLinkage(GV, GVSym); + // emutls_t.* symbols are only used in the current compilation unit. + if (!IsEmuTLSVar) + EmitLinkage(GV, EmittedInitSym); EmitAlignment(AlignLog, GV); - OutStreamer->EmitLabel(GVSym); + OutStreamer->EmitLabel(EmittedInitSym); - EmitGlobalConstant(GV->getInitializer()); + EmitGlobalConstant(GV->getParent()->getDataLayout(), GV->getInitializer()); if (MAI->hasDotTypeDotSizeDirective()) // .size foo, 42 - OutStreamer->emitELFSize(cast<MCSymbolELF>(GVSym), + OutStreamer->emitELFSize(cast<MCSymbolELF>(EmittedInitSym), MCConstantExpr::create(Size, OutContext)); OutStreamer->AddBlankLine(); @@ -545,7 +624,7 @@ void AsmPrinter::EmitFunctionHeader() { // Emit the prefix data. if (F->hasPrefixData()) - EmitGlobalConstant(F->getPrefixData()); + EmitGlobalConstant(F->getParent()->getDataLayout(), F->getPrefixData()); // Emit the CurrentFnSym. This is a virtual function to allow targets to // do their wild and crazy things as required. @@ -580,7 +659,7 @@ void AsmPrinter::EmitFunctionHeader() { // Emit the prologue data. if (F->hasPrologueData()) - EmitGlobalConstant(F->getPrologueData()); + EmitGlobalConstant(F->getParent()->getDataLayout(), F->getPrologueData()); } /// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the @@ -640,19 +719,27 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) { /// that is an implicit def. void AsmPrinter::emitImplicitDef(const MachineInstr *MI) const { unsigned RegNo = MI->getOperand(0).getReg(); - OutStreamer->AddComment(Twine("implicit-def: ") + - MMI->getContext().getRegisterInfo()->getName(RegNo)); + + SmallString<128> Str; + raw_svector_ostream OS(Str); + OS << "implicit-def: " + << PrintReg(RegNo, MF->getSubtarget().getRegisterInfo()); + + OutStreamer->AddComment(OS.str()); OutStreamer->AddBlankLine(); } static void emitKill(const MachineInstr *MI, AsmPrinter &AP) { - std::string Str = "kill:"; + std::string Str; + raw_string_ostream OS(Str); + OS << "kill:"; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &Op = MI->getOperand(i); assert(Op.isReg() && "KILL instruction must have only register operands"); - Str += ' '; - Str += AP.MMI->getContext().getRegisterInfo()->getName(Op.getReg()); - Str += (Op.isDef() ? "<def>" : "<kill>"); + OS << ' ' + << PrintReg(Op.getReg(), + AP.MF->getSubtarget().getRegisterInfo()) + << (Op.isDef() ? "<def>" : "<kill>"); } AP.OutStreamer->AddComment(Str); AP.OutStreamer->AddBlankLine(); @@ -688,6 +775,31 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { bool Deref = MI->getOperand(0).isReg() && MI->getOperand(1).isImm(); int64_t Offset = Deref ? MI->getOperand(1).getImm() : 0; + for (unsigned i = 0; i < Expr->getNumElements(); ++i) { + if (Deref) { + // We currently don't support extra Offsets or derefs after the first + // one. Bail out early instead of emitting an incorrect comment + OS << " [complex expression]"; + AP.OutStreamer->emitRawComment(OS.str()); + return true; + } + uint64_t Op = Expr->getElement(i); + if (Op == dwarf::DW_OP_deref) { + Deref = true; + continue; + } else if (Op == dwarf::DW_OP_bit_piece) { + // There can't be any operands after this in a valid expression + break; + } + uint64_t ExtraOffset = Expr->getElement(i++); + if (Op == dwarf::DW_OP_plus) + Offset += ExtraOffset; + else { + assert(Op == dwarf::DW_OP_minus); + Offset -= ExtraOffset; + } + } + // Register or immediate value. Register 0 means undef. if (MI->getOperand(0).isFPImm()) { APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF()); @@ -727,7 +839,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { } if (Deref) OS << '['; - OS << AP.MMI->getContext().getRegisterInfo()->getName(Reg); + OS << PrintReg(Reg, AP.MF->getSubtarget().getRegisterInfo()); } if (Deref) @@ -888,7 +1000,7 @@ void AsmPrinter::EmitFunctionBody() { EmitFunctionBodyEnd(); if (!MMI->getLandingPads().empty() || MMI->hasDebugInfo() || - MAI->hasDotTypeDotSizeDirective()) { + MMI->hasEHFunclets() || MAI->hasDotTypeDotSizeDirective()) { // Create a symbol for the end of function. CurrentFnEnd = createTempSymbol("func_end"); OutStreamer->EmitLabel(CurrentFnEnd); @@ -1047,20 +1159,17 @@ bool AsmPrinter::doFinalization(Module &M) { // Output stubs for external and common global variables. MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); if (!Stubs.empty()) { - OutStreamer->SwitchSection(TLOF.getDataRelSection()); - const DataLayout *DL = TM.getDataLayout(); + OutStreamer->SwitchSection(TLOF.getDataSection()); + const DataLayout &DL = M.getDataLayout(); for (const auto &Stub : Stubs) { OutStreamer->EmitLabel(Stub.first); OutStreamer->EmitSymbolValue(Stub.second.getPointer(), - DL->getPointerSize()); + DL.getPointerSize()); } } } - // Make sure we wrote out everything we need. - OutStreamer->Flush(); - // Finalize debug and EH information. for (const HandlerInfo &HI : Handlers) { NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, @@ -1103,10 +1212,29 @@ bool AsmPrinter::doFinalization(Module &M) { else assert(Alias.hasLocalLinkage() && "Invalid alias linkage"); + // Set the symbol type to function if the alias has a function type. + // This affects codegen when the aliasee is not a function. + if (Alias.getType()->getPointerElementType()->isFunctionTy()) + OutStreamer->EmitSymbolAttribute(Name, MCSA_ELF_TypeFunction); + EmitVisibility(Name, Alias.getVisibility()); // Emit the directives as assignments aka .set: OutStreamer->EmitAssignment(Name, lowerConstant(Alias.getAliasee())); + + // If the aliasee does not correspond to a symbol in the output, i.e. the + // alias is not of an object or the aliased object is private, then set the + // size of the alias symbol from the type of the alias. We don't do this in + // other situations as the alias and aliasee having differing types but same + // size may be intentional. + const GlobalObject *BaseObject = Alias.getBaseObject(); + if (MAI->hasDotTypeDotSizeDirective() && Alias.getValueType()->isSized() && + (!BaseObject || BaseObject->hasPrivateLinkage())) { + const DataLayout &DL = M.getDataLayout(); + uint64_t Size = DL.getTypeAllocSize(Alias.getValueType()); + OutStreamer->emitELFSize(cast<MCSymbolELF>(Name), + MCConstantExpr::create(Size, OutContext)); + } } GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); @@ -1120,16 +1248,16 @@ bool AsmPrinter::doFinalization(Module &M) { // Emit __morestack address if needed for indirect calls. if (MMI->usesMorestackAddr()) { - MCSection *ReadOnlySection = - getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly(), - /*C=*/nullptr); + MCSection *ReadOnlySection = getObjFileLowering().getSectionForConstant( + getDataLayout(), SectionKind::getReadOnly(), + /*C=*/nullptr); OutStreamer->SwitchSection(ReadOnlySection); MCSymbol *AddrSymbol = OutContext.getOrCreateSymbol(StringRef("__morestack_addr")); OutStreamer->EmitLabel(AddrSymbol); - unsigned PtrSize = TM.getDataLayout()->getPointerSize(0); + unsigned PtrSize = M.getDataLayout().getPointerSize(0); OutStreamer->EmitSymbolValue(GetExternalSymbolSymbol("__morestack"), PtrSize); } @@ -1169,7 +1297,7 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { CurExceptionSym = nullptr; bool NeedsLocalForSize = MAI->needsLocalForSize(); if (!MMI->getLandingPads().empty() || MMI->hasDebugInfo() || - NeedsLocalForSize) { + MMI->hasEHFunclets() || NeedsLocalForSize) { CurrentFnBegin = createTempSymbol("func_begin"); if (NeedsLocalForSize) CurrentFnSymForSize = CurrentFnBegin; @@ -1206,14 +1334,14 @@ void AsmPrinter::EmitConstantPool() { const MachineConstantPoolEntry &CPE = CP[i]; unsigned Align = CPE.getAlignment(); - SectionKind Kind = - CPE.getSectionKind(TM.getDataLayout()); + SectionKind Kind = CPE.getSectionKind(&getDataLayout()); const Constant *C = nullptr; if (!CPE.isMachineConstantPoolEntry()) C = CPE.Val.ConstVal; - MCSection *S = getObjFileLowering().getSectionForConstant(Kind, C); + MCSection *S = + getObjFileLowering().getSectionForConstant(getDataLayout(), Kind, C); // The number of sections are small, just do a linear search from the // last section to the first. @@ -1260,14 +1388,13 @@ void AsmPrinter::EmitConstantPool() { OutStreamer->EmitZeros(NewOffset - Offset); Type *Ty = CPE.getType(); - Offset = NewOffset + - TM.getDataLayout()->getTypeAllocSize(Ty); + Offset = NewOffset + getDataLayout().getTypeAllocSize(Ty); OutStreamer->EmitLabel(Sym); if (CPE.isMachineConstantPoolEntry()) EmitMachineConstantPoolValue(CPE.Val.MachineCPVal); else - EmitGlobalConstant(CPE.Val.ConstVal); + EmitGlobalConstant(getDataLayout(), CPE.Val.ConstVal); } } } @@ -1276,7 +1403,7 @@ void AsmPrinter::EmitConstantPool() { /// by the current function to the current output stream. /// void AsmPrinter::EmitJumpTableInfo() { - const DataLayout *DL = MF->getTarget().getDataLayout(); + const DataLayout &DL = MF->getDataLayout(); const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); if (!MJTI) return; if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline) return; @@ -1296,8 +1423,7 @@ void AsmPrinter::EmitJumpTableInfo() { OutStreamer->SwitchSection(ReadOnlySection); } - EmitAlignment(Log2_32( - MJTI->getEntryAlignment(*TM.getDataLayout()))); + EmitAlignment(Log2_32(MJTI->getEntryAlignment(DL))); // Jump tables in code sections are marked with a data_region directive // where that's supported. @@ -1335,7 +1461,7 @@ void AsmPrinter::EmitJumpTableInfo() { // before each jump table. The first label is never referenced, but tells // the assembler and linker the extents of the jump table object. The // second label is actually referenced by the code. - if (JTInDiffSection && DL->hasLinkerPrivateGlobalPrefix()) + if (JTInDiffSection && DL.hasLinkerPrivateGlobalPrefix()) // FIXME: This doesn't have to have any specific name, just any randomly // named and numbered 'l' label would work. Simplify GetJTISymbol. OutStreamer->EmitLabel(GetJTISymbol(JTI, true)); @@ -1409,8 +1535,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, assert(Value && "Unknown entry kind!"); - unsigned EntrySize = - MJTI->getEntrySize(*TM.getDataLayout()); + unsigned EntrySize = MJTI->getEntrySize(getDataLayout()); OutStreamer->EmitValue(Value, EntrySize); } @@ -1435,7 +1560,8 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { assert(GV->hasInitializer() && "Not a special LLVM global!"); if (GV->getName() == "llvm.global_ctors") { - EmitXXStructorList(GV->getInitializer(), /* isCtor */ true); + EmitXXStructorList(GV->getParent()->getDataLayout(), GV->getInitializer(), + /* isCtor */ true); if (TM.getRelocationModel() == Reloc::Static && MAI->hasStaticCtorDtorReferenceInStaticMode()) { @@ -1447,7 +1573,8 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { } if (GV->getName() == "llvm.global_dtors") { - EmitXXStructorList(GV->getInitializer(), /* isCtor */ false); + EmitXXStructorList(GV->getParent()->getDataLayout(), GV->getInitializer(), + /* isCtor */ false); if (TM.getRelocationModel() == Reloc::Static && MAI->hasStaticCtorDtorReferenceInStaticMode()) { @@ -1485,7 +1612,8 @@ struct Structor { /// EmitXXStructorList - Emit the ctor or dtor list taking into account the init /// priority. -void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { +void AsmPrinter::EmitXXStructorList(const DataLayout &DL, const Constant *List, + bool isCtor) { // Should be an array of '{ int, void ()* }' structs. The first value is the // init priority. if (!isa<ConstantArray>(List)) return; @@ -1520,8 +1648,7 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { } // Emit the function pointers in the target-specific order - const DataLayout *DL = TM.getDataLayout(); - unsigned Align = Log2_32(DL->getPointerPrefAlignment()); + unsigned Align = Log2_32(DL.getPointerPrefAlignment()); std::stable_sort(Structors.begin(), Structors.end(), [](const Structor &L, const Structor &R) { return L.Priority < R.Priority; }); @@ -1542,7 +1669,7 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { OutStreamer->SwitchSection(OutputSection); if (OutStreamer->getCurrentSection() != OutStreamer->getPreviousSection()) EmitAlignment(Align); - EmitXXStructor(S.Func); + EmitXXStructor(DL, S.Func); } } @@ -1621,8 +1748,7 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, // void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalObject *GV) const { if (GV) - NumBits = getGVAlignmentLog2(GV, *TM.getDataLayout(), - NumBits); + NumBits = getGVAlignmentLog2(GV, GV->getParent()->getDataLayout(), NumBits); if (NumBits == 0) return; // 1-byte aligned: no need to emit alignment. @@ -1668,7 +1794,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { // If the code isn't optimized, there may be outstanding folding // opportunities. Attempt to fold the expression using DataLayout as a // last resort before giving up. - if (Constant *C = ConstantFoldConstantExpression(CE, *TM.getDataLayout())) + if (Constant *C = ConstantFoldConstantExpression(CE, getDataLayout())) if (C != CE) return lowerConstant(C); @@ -1682,11 +1808,9 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { report_fatal_error(OS.str()); } case Instruction::GetElementPtr: { - const DataLayout &DL = *TM.getDataLayout(); - // Generate a symbolic expression for the byte address - APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0); - cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI); + APInt OffsetAI(getDataLayout().getPointerTypeSizeInBits(CE->getType()), 0); + cast<GEPOperator>(CE)->accumulateConstantOffset(getDataLayout(), OffsetAI); const MCExpr *Base = lowerConstant(CE->getOperand(0)); if (!OffsetAI) @@ -1707,7 +1831,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { return lowerConstant(CE->getOperand(0)); case Instruction::IntToPtr: { - const DataLayout &DL = *TM.getDataLayout(); + const DataLayout &DL = getDataLayout(); // Handle casts to pointers by changing them into casts to the appropriate // integer type. This promotes constant folding and simplifies this code. @@ -1718,7 +1842,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { } case Instruction::PtrToInt: { - const DataLayout &DL = *TM.getDataLayout(); + const DataLayout &DL = getDataLayout(); // Support only foldable casts to/from pointers that can be eliminated by // changing the pointer to the appropriately sized integer type. @@ -1769,10 +1893,13 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { } } -static void emitGlobalConstantImpl(const Constant *C, AsmPrinter &AP, +static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *C, + AsmPrinter &AP, const Constant *BaseCV = nullptr, uint64_t Offset = 0); +static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP); + /// isRepeatedByteSequence - Determine whether the given value is /// composed of a repeated sequence of identical bytes and return the /// byte value. If it is not a repeated sequence, return -1. @@ -1789,9 +1916,9 @@ static int isRepeatedByteSequence(const ConstantDataSequential *V) { /// isRepeatedByteSequence - Determine whether the given value is /// composed of a repeated sequence of identical bytes and return the /// byte value. If it is not a repeated sequence, return -1. -static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) { +static int isRepeatedByteSequence(const Value *V, const DataLayout &DL) { if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { - uint64_t Size = TM.getDataLayout()->getTypeAllocSizeInBits(V->getType()); + uint64_t Size = DL.getTypeAllocSizeInBits(V->getType()); assert(Size % 8 == 0); // Extend the element to take zero padding into account. @@ -1806,7 +1933,7 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) { // byte. assert(CA->getNumOperands() != 0 && "Should be a CAZ"); Constant *Op0 = CA->getOperand(0); - int Byte = isRepeatedByteSequence(Op0, TM); + int Byte = isRepeatedByteSequence(Op0, DL); if (Byte == -1) return -1; @@ -1823,15 +1950,14 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) { return -1; } -static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, - AsmPrinter &AP){ +static void emitGlobalConstantDataSequential(const DataLayout &DL, + const ConstantDataSequential *CDS, + AsmPrinter &AP) { // See if we can aggregate this into a .fill, if so, emit it as such. - int Value = isRepeatedByteSequence(CDS, AP.TM); + int Value = isRepeatedByteSequence(CDS, DL); if (Value != -1) { - uint64_t Bytes = - AP.TM.getDataLayout()->getTypeAllocSize( - CDS->getType()); + uint64_t Bytes = DL.getTypeAllocSize(CDS->getType()); // Don't emit a 1-byte object as a .fill. if (Bytes > 1) return AP.OutStreamer->EmitFill(Bytes, Value); @@ -1851,37 +1977,11 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, AP.OutStreamer->EmitIntValue(CDS->getElementAsInteger(i), ElementByteSize); } - } else if (ElementByteSize == 4) { - // FP Constants are printed as integer constants to avoid losing - // precision. - assert(CDS->getElementType()->isFloatTy()); - for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { - union { - float F; - uint32_t I; - }; - - F = CDS->getElementAsFloat(i); - if (AP.isVerbose()) - AP.OutStreamer->GetCommentOS() << "float " << F << '\n'; - AP.OutStreamer->EmitIntValue(I, 4); - } } else { - assert(CDS->getElementType()->isDoubleTy()); - for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { - union { - double F; - uint64_t I; - }; - - F = CDS->getElementAsDouble(i); - if (AP.isVerbose()) - AP.OutStreamer->GetCommentOS() << "double " << F << '\n'; - AP.OutStreamer->EmitIntValue(I, 8); - } + for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) + emitGlobalConstantFP(cast<ConstantFP>(CDS->getElementAsConstant(I)), AP); } - const DataLayout &DL = *AP.TM.getDataLayout(); unsigned Size = DL.getTypeAllocSize(CDS->getType()); unsigned EmittedSize = DL.getTypeAllocSize(CDS->getType()->getElementType()) * CDS->getNumElements(); @@ -1890,12 +1990,12 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, } -static void emitGlobalConstantArray(const ConstantArray *CA, AsmPrinter &AP, +static void emitGlobalConstantArray(const DataLayout &DL, + const ConstantArray *CA, AsmPrinter &AP, const Constant *BaseCV, uint64_t Offset) { // See if we can aggregate some values. Make sure it can be // represented as a series of bytes of the constant value. - int Value = isRepeatedByteSequence(CA, AP.TM); - const DataLayout &DL = *AP.TM.getDataLayout(); + int Value = isRepeatedByteSequence(CA, DL); if (Value != -1) { uint64_t Bytes = DL.getTypeAllocSize(CA->getType()); @@ -1903,17 +2003,17 @@ static void emitGlobalConstantArray(const ConstantArray *CA, AsmPrinter &AP, } else { for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) { - emitGlobalConstantImpl(CA->getOperand(i), AP, BaseCV, Offset); + emitGlobalConstantImpl(DL, CA->getOperand(i), AP, BaseCV, Offset); Offset += DL.getTypeAllocSize(CA->getOperand(i)->getType()); } } } -static void emitGlobalConstantVector(const ConstantVector *CV, AsmPrinter &AP) { +static void emitGlobalConstantVector(const DataLayout &DL, + const ConstantVector *CV, AsmPrinter &AP) { for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i) - emitGlobalConstantImpl(CV->getOperand(i), AP); + emitGlobalConstantImpl(DL, CV->getOperand(i), AP); - const DataLayout &DL = *AP.TM.getDataLayout(); unsigned Size = DL.getTypeAllocSize(CV->getType()); unsigned EmittedSize = DL.getTypeAllocSize(CV->getType()->getElementType()) * CV->getType()->getNumElements(); @@ -1921,21 +2021,21 @@ static void emitGlobalConstantVector(const ConstantVector *CV, AsmPrinter &AP) { AP.OutStreamer->EmitZeros(Padding); } -static void emitGlobalConstantStruct(const ConstantStruct *CS, AsmPrinter &AP, +static void emitGlobalConstantStruct(const DataLayout &DL, + const ConstantStruct *CS, AsmPrinter &AP, const Constant *BaseCV, uint64_t Offset) { // Print the fields in successive locations. Pad to align if needed! - const DataLayout *DL = AP.TM.getDataLayout(); - unsigned Size = DL->getTypeAllocSize(CS->getType()); - const StructLayout *Layout = DL->getStructLayout(CS->getType()); + unsigned Size = DL.getTypeAllocSize(CS->getType()); + const StructLayout *Layout = DL.getStructLayout(CS->getType()); uint64_t SizeSoFar = 0; for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) { const Constant *Field = CS->getOperand(i); // Print the actual field value. - emitGlobalConstantImpl(Field, AP, BaseCV, Offset+SizeSoFar); + emitGlobalConstantImpl(DL, Field, AP, BaseCV, Offset + SizeSoFar); // Check if padding is needed and insert one or more 0s. - uint64_t FieldSize = DL->getTypeAllocSize(Field->getType()); + uint64_t FieldSize = DL.getTypeAllocSize(Field->getType()); uint64_t PadSize = ((i == e-1 ? Size : Layout->getElementOffset(i+1)) - Layout->getElementOffset(i)) - FieldSize; SizeSoFar += FieldSize + PadSize; @@ -1974,8 +2074,7 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) { // PPC's long double has odd notions of endianness compared to how LLVM // handles it: p[0] goes first for *big* endian on PPC. - if (AP.TM.getDataLayout()->isBigEndian() && - !CFP->getType()->isPPC_FP128Ty()) { + if (AP.getDataLayout().isBigEndian() && !CFP->getType()->isPPC_FP128Ty()) { int Chunk = API.getNumWords() - 1; if (TrailingBytes) @@ -1993,13 +2092,13 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) { } // Emit the tail padding for the long double. - const DataLayout &DL = *AP.TM.getDataLayout(); + const DataLayout &DL = AP.getDataLayout(); AP.OutStreamer->EmitZeros(DL.getTypeAllocSize(CFP->getType()) - DL.getTypeStoreSize(CFP->getType())); } static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) { - const DataLayout *DL = AP.TM.getDataLayout(); + const DataLayout &DL = AP.getDataLayout(); unsigned BitWidth = CI->getBitWidth(); // Copy the value as we may massage the layout for constants whose bit width @@ -2016,7 +2115,7 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) { // Big endian: // * Record the extra bits to emit. // * Realign the raw data to emit the chunks of 64-bits. - if (DL->isBigEndian()) { + if (DL.isBigEndian()) { // Basically the structure of the raw data is a chunk of 64-bits cells: // 0 1 BitWidth / 64 // [chunk1][chunk2] ... [chunkN]. @@ -2037,7 +2136,7 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) { // quantities at a time. const uint64_t *RawData = Realigned.getRawData(); for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) { - uint64_t Val = DL->isBigEndian() ? RawData[e - i - 1] : RawData[i]; + uint64_t Val = DL.isBigEndian() ? RawData[e - i - 1] : RawData[i]; AP.OutStreamer->EmitIntValue(Val, 8); } @@ -2045,8 +2144,7 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) { // Emit the extra bits after the 64-bits chunks. // Emit a directive that fills the expected size. - uint64_t Size = AP.TM.getDataLayout()->getTypeAllocSize( - CI->getType()); + uint64_t Size = AP.getDataLayout().getTypeAllocSize(CI->getType()); Size -= (BitWidth / 64) * 8; assert(Size && Size * 8 >= ExtraBitsSize && (ExtraBits & (((uint64_t)-1) >> (64 - ExtraBitsSize))) @@ -2094,7 +2192,7 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME, if (!AP.GlobalGOTEquivs.count(GOTEquivSym)) return; - const GlobalValue *BaseGV = dyn_cast<GlobalValue>(BaseCst); + const GlobalValue *BaseGV = dyn_cast_or_null<GlobalValue>(BaseCst); if (!BaseGV) return; @@ -2149,10 +2247,10 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME, AP.GlobalGOTEquivs[GOTEquivSym] = std::make_pair(GV, NumUses); } -static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP, - const Constant *BaseCV, uint64_t Offset) { - const DataLayout *DL = AP.TM.getDataLayout(); - uint64_t Size = DL->getTypeAllocSize(CV->getType()); +static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV, + AsmPrinter &AP, const Constant *BaseCV, + uint64_t Offset) { + uint64_t Size = DL.getTypeAllocSize(CV->getType()); // Globals with sub-elements such as combinations of arrays and structs // are handled recursively by emitGlobalConstantImpl. Keep track of the @@ -2189,32 +2287,32 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP, } if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(CV)) - return emitGlobalConstantDataSequential(CDS, AP); + return emitGlobalConstantDataSequential(DL, CDS, AP); if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) - return emitGlobalConstantArray(CVA, AP, BaseCV, Offset); + return emitGlobalConstantArray(DL, CVA, AP, BaseCV, Offset); if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) - return emitGlobalConstantStruct(CVS, AP, BaseCV, Offset); + return emitGlobalConstantStruct(DL, CVS, AP, BaseCV, Offset); if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) { // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of // vectors). if (CE->getOpcode() == Instruction::BitCast) - return emitGlobalConstantImpl(CE->getOperand(0), AP); + return emitGlobalConstantImpl(DL, CE->getOperand(0), AP); if (Size > 8) { // If the constant expression's size is greater than 64-bits, then we have // to emit the value in chunks. Try to constant fold the value and emit it // that way. - Constant *New = ConstantFoldConstantExpression(CE, *DL); + Constant *New = ConstantFoldConstantExpression(CE, DL); if (New && New != CE) - return emitGlobalConstantImpl(New, AP); + return emitGlobalConstantImpl(DL, New, AP); } } if (const ConstantVector *V = dyn_cast<ConstantVector>(CV)) - return emitGlobalConstantVector(V, AP); + return emitGlobalConstantVector(DL, V, AP); // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it // thread the streamer with EmitValue. @@ -2230,11 +2328,10 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP, } /// EmitGlobalConstant - Print a general LLVM constant to the .s file. -void AsmPrinter::EmitGlobalConstant(const Constant *CV) { - uint64_t Size = - TM.getDataLayout()->getTypeAllocSize(CV->getType()); +void AsmPrinter::EmitGlobalConstant(const DataLayout &DL, const Constant *CV) { + uint64_t Size = DL.getTypeAllocSize(CV->getType()); if (Size) - emitGlobalConstantImpl(CV, *this); + emitGlobalConstantImpl(DL, CV, *this); else if (MAI->hasSubsectionsViaSymbols()) { // If the global has zero size, emit a single byte so that two labels don't // look like they are at the same location. @@ -2272,10 +2369,10 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const { /// GetCPISymbol - Return the symbol for the specified constant pool entry. MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const { - const DataLayout *DL = TM.getDataLayout(); - return OutContext.getOrCreateSymbol - (Twine(DL->getPrivateGlobalPrefix()) + "CPI" + Twine(getFunctionNumber()) - + "_" + Twine(CPID)); + const DataLayout &DL = getDataLayout(); + return OutContext.getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) + + "CPI" + Twine(getFunctionNumber()) + "_" + + Twine(CPID)); } /// GetJTISymbol - Return the symbol for the specified jump table entry. @@ -2286,10 +2383,10 @@ MCSymbol *AsmPrinter::GetJTISymbol(unsigned JTID, bool isLinkerPrivate) const { /// GetJTSetSymbol - Return the symbol for the specified jump table .set /// FIXME: privatize to AsmPrinter. MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const { - const DataLayout *DL = TM.getDataLayout(); - return OutContext.getOrCreateSymbol - (Twine(DL->getPrivateGlobalPrefix()) + Twine(getFunctionNumber()) + "_" + - Twine(UID) + "_set_" + Twine(MBBID)); + const DataLayout &DL = getDataLayout(); + return OutContext.getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) + + Twine(getFunctionNumber()) + "_" + + Twine(UID) + "_set_" + Twine(MBBID)); } MCSymbol *AsmPrinter::getSymbolWithGlobalValueBase(const GlobalValue *GV, @@ -2301,7 +2398,7 @@ MCSymbol *AsmPrinter::getSymbolWithGlobalValueBase(const GlobalValue *GV, /// Return the MCSymbol for the specified ExternalSymbol. MCSymbol *AsmPrinter::GetExternalSymbolSymbol(StringRef Sym) const { SmallString<60> NameStr; - Mangler::getNameWithPrefix(NameStr, Sym, *TM.getDataLayout()); + Mangler::getNameWithPrefix(NameStr, Sym, getDataLayout()); return OutContext.getOrCreateSymbol(NameStr); } @@ -2376,6 +2473,14 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB, /// MachineBasicBlock, an alignment (if present) and a comment describing /// it if appropriate. void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const { + // End the previous funclet and start a new one. + if (MBB.isEHFuncletEntry()) { + for (const HandlerInfo &HI : Handlers) { + HI.Handler->endFunclet(); + HI.Handler->beginFunclet(MBB); + } + } + // Emit an alignment directive for this block, if needed. if (unsigned Align = MBB.getAlignment()) EmitAlignment(Align); @@ -2389,20 +2494,28 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const { if (isVerbose()) OutStreamer->AddComment("Block address taken"); - for (MCSymbol *Sym : MMI->getAddrLabelSymbolToEmit(BB)) - OutStreamer->EmitLabel(Sym); + // MBBs can have their address taken as part of CodeGen without having + // their corresponding BB's address taken in IR + if (BB->hasAddressTaken()) + for (MCSymbol *Sym : MMI->getAddrLabelSymbolToEmit(BB)) + OutStreamer->EmitLabel(Sym); } // Print some verbose block comments. if (isVerbose()) { - if (const BasicBlock *BB = MBB.getBasicBlock()) - if (BB->hasName()) - OutStreamer->AddComment("%" + BB->getName()); + if (const BasicBlock *BB = MBB.getBasicBlock()) { + if (BB->hasName()) { + BB->printAsOperand(OutStreamer->GetCommentOS(), + /*PrintType=*/false, BB->getModule()); + OutStreamer->GetCommentOS() << '\n'; + } + } emitBasicBlockLoopComments(MBB, LI, *this); } // Print the main label for the block. - if (MBB.pred_empty() || isBlockOnlyReachableByFallthrough(&MBB)) { + if (MBB.pred_empty() || + (isBlockOnlyReachableByFallthrough(&MBB) && !MBB.isEHFuncletEntry())) { if (isVerbose()) { // NOTE: Want this comment at start of line, don't emit with AddComment. OutStreamer->emitRawComment(" BB#" + Twine(MBB.getNumber()) + ":", false); @@ -2440,7 +2553,7 @@ bool AsmPrinter:: isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { // If this is a landing pad, it isn't a fall through. If it has no preds, // then nothing falls through to it. - if (MBB->isLandingPad() || MBB->pred_empty()) + if (MBB->isEHPad() || MBB->pred_empty()) return false; // If there isn't exactly one predecessor, it can't be a fall through. diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index ad180b6667c0..504c5d283cba 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -47,7 +47,7 @@ void AsmPrinter::EmitSLEB128(int64_t Value, const char *Desc) const { OutStreamer->EmitSLEB128IntValue(Value); } -/// EmitULEB128 - emit the specified signed leb128 value. +/// EmitULEB128 - emit the specified unsigned leb128 value. void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc, unsigned PadTo) const { if (isVerbose() && Desc) @@ -56,18 +56,6 @@ void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc, OutStreamer->EmitULEB128IntValue(Value, PadTo); } -/// EmitCFAByte - Emit a .byte 42 directive for a DW_CFA_xxx value. -void AsmPrinter::EmitCFAByte(unsigned Val) const { - if (isVerbose()) { - if (Val >= dwarf::DW_CFA_offset && Val < dwarf::DW_CFA_offset + 64) - OutStreamer->AddComment("DW_CFA_offset + Reg (" + - Twine(Val - dwarf::DW_CFA_offset) + ")"); - else - OutStreamer->AddComment(dwarf::CallFrameString(Val)); - } - OutStreamer->EmitIntValue(Val, 1); -} - static const char *DecodeDWARFEncoding(unsigned Encoding) { switch (Encoding) { case dwarf::DW_EH_PE_absptr: @@ -134,7 +122,7 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const { default: llvm_unreachable("Invalid encoded value."); case dwarf::DW_EH_PE_absptr: - return TM.getDataLayout()->getPointerSize(); + return MF->getDataLayout().getPointerSize(); case dwarf::DW_EH_PE_udata2: return 2; case dwarf::DW_EH_PE_udata4: @@ -228,6 +216,9 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const { case MCCFIInstruction::OpDefCfaOffset: OutStreamer->EmitCFIDefCfaOffset(Inst.getOffset()); break; + case MCCFIInstruction::OpAdjustCfaOffset: + OutStreamer->EmitCFIAdjustCfaOffset(Inst.getOffset()); + break; case MCCFIInstruction::OpDefCfa: OutStreamer->EmitCFIDefCfa(Inst.getRegister(), Inst.getOffset()); break; @@ -246,6 +237,12 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const { case MCCFIInstruction::OpSameValue: OutStreamer->EmitCFISameValue(Inst.getRegister()); break; + case MCCFIInstruction::OpGnuArgsSize: + OutStreamer->EmitCFIGnuArgsSize(Inst.getOffset()); + break; + case MCCFIInstruction::OpEscape: + OutStreamer->EmitCFIEscape(Inst.getValues()); + break; } } @@ -284,17 +281,10 @@ void AsmPrinter::emitDwarfDIE(const DIE &Die) const { } } -void -AsmPrinter::emitDwarfAbbrevs(const std::vector<DIEAbbrev *>& Abbrevs) const { - // For each abbrevation. - for (const DIEAbbrev *Abbrev : Abbrevs) { - // Emit the abbrevations code (base 1 index.) - EmitULEB128(Abbrev->getNumber(), "Abbreviation Code"); - - // Emit the abbreviations data. - Abbrev->Emit(this); - } +void AsmPrinter::emitDwarfAbbrev(const DIEAbbrev &Abbrev) const { + // Emit the abbreviations code (base 1 index.) + EmitULEB128(Abbrev.getNumber(), "Abbreviation Code"); - // Mark end of abbreviations. - EmitULEB128(0, "EOM(3)"); + // Emit the abbreviations data. + Abbrev.Emit(this); } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h index f1efe9d835e0..e59961f85769 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h @@ -19,6 +19,7 @@ namespace llvm { +class MachineBasicBlock; class MachineFunction; class MachineInstr; class MCSymbol; @@ -50,6 +51,11 @@ public: /// beginFunction at all. virtual void endFunction(const MachineFunction *MF) = 0; + /// \brief Emit target-specific EH funclet machinery. + virtual void beginFunclet(const MachineBasicBlock &MBB, + MCSymbol *Sym = nullptr) {} + virtual void endFunclet() {} + /// \brief Process beginning of an instruction. virtual void beginInstruction(const MachineInstr *MI) = 0; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 793e62960dd6..4171657b5285 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -127,19 +127,13 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI, std::unique_ptr<MCAsmParser> Parser( createMCAsmParser(SrcMgr, OutContext, *OutStreamer, *MAI)); - // Create a temporary copy of the original STI because the parser may modify - // it. For example, when switching between arm and thumb mode. If the target - // needs to emit code to return to the original state it can do so in - // emitInlineAsmEnd(). - MCSubtargetInfo TmpSTI = STI; - // We create a new MCInstrInfo here since we might be at the module level // and not have a MachineFunction to initialize the TargetInstrInfo from and // we only need MCInstrInfo for asm parsing. We create one unconditionally // because it's not subtarget dependent. std::unique_ptr<MCInstrInfo> MII(TM.getTarget().createMCInstrInfo()); std::unique_ptr<MCTargetAsmParser> TAP(TM.getTarget().createMCAsmParser( - TmpSTI, *Parser, *MII, MCOptions)); + STI, *Parser, *MII, MCOptions)); if (!TAP) report_fatal_error("Inline asm not supported by this streamer because" " we don't have an asm parser for this target\n"); @@ -154,7 +148,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI, // Don't implicitly switch to the text section before the asm. int Res = Parser->Run(/*NoInitialTextSection*/ true, /*NoFinalize*/ true); - emitInlineAsmEnd(STI, &TmpSTI); + emitInlineAsmEnd(STI, &TAP->getSTI()); if (Res && !HasDiagHandler) report_fatal_error("Error parsing inline asm\n"); } @@ -512,9 +506,9 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { /// for their own strange codes. void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS, const char *Code) const { - const DataLayout *DL = TM.getDataLayout(); if (!strcmp(Code, "private")) { - OS << DL->getPrivateGlobalPrefix(); + const DataLayout &DL = MF->getDataLayout(); + OS << DL.getPrivateGlobalPrefix(); } else if (!strcmp(Code, "comment")) { OS << MAI->getCommentString(); } else if (!strcmp(Code, "uid")) { diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h b/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h index 0cc829fffc54..df1997bcb72c 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h @@ -24,16 +24,19 @@ namespace llvm { class ByteStreamer { - public: - virtual ~ByteStreamer() {} + protected: + ~ByteStreamer() = default; + ByteStreamer(const ByteStreamer&) = default; + ByteStreamer() = default; + public: // For now we're just handling the calls we need for dwarf emission/hashing. virtual void EmitInt8(uint8_t Byte, const Twine &Comment = "") = 0; virtual void EmitSLEB128(uint64_t DWord, const Twine &Comment = "") = 0; virtual void EmitULEB128(uint64_t DWord, const Twine &Comment = "") = 0; }; -class APByteStreamer : public ByteStreamer { +class APByteStreamer final : public ByteStreamer { private: AsmPrinter &AP; @@ -53,7 +56,7 @@ public: } }; -class HashingByteStreamer : public ByteStreamer { +class HashingByteStreamer final : public ByteStreamer { private: DIEHash &Hash; public: @@ -69,7 +72,7 @@ class HashingByteStreamer : public ByteStreamer { } }; -class BufferByteStreamer : public ByteStreamer { +class BufferByteStreamer final : public ByteStreamer { private: SmallVectorImpl<char> &Buffer; SmallVectorImpl<std::string> &Comments; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp index 46dbc7693698..bf794f7f70f6 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -86,7 +86,7 @@ void DIEAbbrev::Emit(const AsmPrinter *AP) const { AP->EmitULEB128(0, "EOM(2)"); } -#ifndef NDEBUG +LLVM_DUMP_METHOD void DIEAbbrev::print(raw_ostream &O) { O << "Abbreviation @" << format("0x%lx", (long)(intptr_t)this) @@ -104,12 +104,13 @@ void DIEAbbrev::print(raw_ostream &O) { << '\n'; } } + +LLVM_DUMP_METHOD void DIEAbbrev::dump() { print(dbgs()); } -#endif DIEAbbrev DIE::generateAbbrev() const { DIEAbbrev Abbrev(Tag, hasChildren()); - for (const DIEValue &V : Values) + for (const DIEValue &V : values()) Abbrev.AddAttribute(V.getAttribute(), V.getForm()); return Abbrev; } @@ -144,36 +145,35 @@ DIEValue DIE::findAttribute(dwarf::Attribute Attribute) const { return DIEValue(); } -#ifndef NDEBUG -void DIE::print(raw_ostream &O, unsigned IndentCount) const { - const std::string Indent(IndentCount, ' '); - bool isBlock = getTag() == 0; - - if (!isBlock) { - O << Indent - << "Die: " - << format("0x%lx", (long)(intptr_t)this) - << ", Offset: " << Offset - << ", Size: " << Size << "\n"; - - O << Indent - << dwarf::TagString(getTag()) - << " " - << dwarf::ChildrenString(hasChildren()) << "\n"; - } else { - O << "Size: " << Size << "\n"; - } +LLVM_DUMP_METHOD +static void printValues(raw_ostream &O, const DIEValueList &Values, + StringRef Type, unsigned Size, unsigned IndentCount) { + O << Type << ": Size: " << Size << "\n"; - IndentCount += 2; unsigned I = 0; - for (const auto &V : Values) { + const std::string Indent(IndentCount, ' '); + for (const auto &V : Values.values()) { O << Indent; + O << "Blk[" << I++ << "]"; + O << " " << dwarf::FormEncodingString(V.getForm()) << " "; + V.print(O); + O << "\n"; + } +} - if (!isBlock) - O << dwarf::AttributeString(V.getAttribute()); - else - O << "Blk[" << I++ << "]"; +LLVM_DUMP_METHOD +void DIE::print(raw_ostream &O, unsigned IndentCount) const { + const std::string Indent(IndentCount, ' '); + O << Indent << "Die: " << format("0x%lx", (long)(intptr_t) this) + << ", Offset: " << Offset << ", Size: " << Size << "\n"; + O << Indent << dwarf::TagString(getTag()) << " " + << dwarf::ChildrenString(hasChildren()) << "\n"; + + IndentCount += 2; + for (const auto &V : values()) { + O << Indent; + O << dwarf::AttributeString(V.getAttribute()); O << " " << dwarf::FormEncodingString(V.getForm()) << " "; V.print(O); O << "\n"; @@ -183,13 +183,13 @@ void DIE::print(raw_ostream &O, unsigned IndentCount) const { for (const auto &Child : children()) Child.print(O, IndentCount + 4); - if (!isBlock) O << "\n"; + O << "\n"; } +LLVM_DUMP_METHOD void DIE::dump() { print(dbgs()); } -#endif void DIEValue::EmitValue(const AsmPrinter *AP) const { switch (Ty) { @@ -215,7 +215,7 @@ unsigned DIEValue::SizeOf(const AsmPrinter *AP) const { llvm_unreachable("Unknown DIE kind"); } -#ifndef NDEBUG +LLVM_DUMP_METHOD void DIEValue::print(raw_ostream &O) const { switch (Ty) { case isNone: @@ -228,10 +228,10 @@ void DIEValue::print(raw_ostream &O) const { } } +LLVM_DUMP_METHOD void DIEValue::dump() const { print(dbgs()); } -#endif //===----------------------------------------------------------------------===// // DIEInteger Implementation @@ -264,7 +264,8 @@ void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const { case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return; case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return; case dwarf::DW_FORM_addr: - Size = Asm->getDataLayout().getPointerSize(); break; + Size = Asm->getPointerSize(); + break; case dwarf::DW_FORM_ref_addr: Size = SizeOf(Asm, dwarf::DW_FORM_ref_addr); break; @@ -294,21 +295,21 @@ unsigned DIEInteger::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { case dwarf::DW_FORM_GNU_addr_index: return getULEB128Size(Integer); case dwarf::DW_FORM_udata: return getULEB128Size(Integer); case dwarf::DW_FORM_sdata: return getSLEB128Size(Integer); - case dwarf::DW_FORM_addr: return AP->getDataLayout().getPointerSize(); + case dwarf::DW_FORM_addr: + return AP->getPointerSize(); case dwarf::DW_FORM_ref_addr: if (AP->OutStreamer->getContext().getDwarfVersion() == 2) - return AP->getDataLayout().getPointerSize(); + return AP->getPointerSize(); return sizeof(int32_t); default: llvm_unreachable("DIE Value form not supported yet"); } } -#ifndef NDEBUG +LLVM_DUMP_METHOD void DIEInteger::print(raw_ostream &O) const { O << "Int: " << (int64_t)Integer << " 0x"; O.write_hex(Integer); } -#endif //===----------------------------------------------------------------------===// // DIEExpr Implementation @@ -326,12 +327,11 @@ unsigned DIEExpr::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_sec_offset) return 4; if (Form == dwarf::DW_FORM_strp) return 4; - return AP->getDataLayout().getPointerSize(); + return AP->getPointerSize(); } -#ifndef NDEBUG +LLVM_DUMP_METHOD void DIEExpr::print(raw_ostream &O) const { O << "Expr: " << *Expr; } -#endif //===----------------------------------------------------------------------===// // DIELabel Implementation @@ -352,12 +352,11 @@ unsigned DIELabel::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_sec_offset) return 4; if (Form == dwarf::DW_FORM_strp) return 4; - return AP->getDataLayout().getPointerSize(); + return AP->getPointerSize(); } -#ifndef NDEBUG +LLVM_DUMP_METHOD void DIELabel::print(raw_ostream &O) const { O << "Lbl: " << Label->getName(); } -#endif //===----------------------------------------------------------------------===// // DIEDelta Implementation @@ -375,14 +374,13 @@ unsigned DIEDelta::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_sec_offset) return 4; if (Form == dwarf::DW_FORM_strp) return 4; - return AP->getDataLayout().getPointerSize(); + return AP->getPointerSize(); } -#ifndef NDEBUG +LLVM_DUMP_METHOD void DIEDelta::print(raw_ostream &O) const { O << "Del: " << LabelHi->getName() << "-" << LabelLo->getName(); } -#endif //===----------------------------------------------------------------------===// // DIEString Implementation @@ -431,11 +429,10 @@ unsigned DIEString::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { return DIEInteger(S.getOffset()).SizeOf(AP, Form); } -#ifndef NDEBUG +LLVM_DUMP_METHOD void DIEString::print(raw_ostream &O) const { O << "String: " << S.getString(); } -#endif //===----------------------------------------------------------------------===// // DIEEntry Implementation @@ -472,15 +469,14 @@ unsigned DIEEntry::getRefAddrSize(const AsmPrinter *AP) { const DwarfDebug *DD = AP->getDwarfDebug(); assert(DD && "Expected Dwarf Debug info to be available"); if (DD->getDwarfVersion() == 2) - return AP->getDataLayout().getPointerSize(); + return AP->getPointerSize(); return sizeof(int32_t); } -#ifndef NDEBUG +LLVM_DUMP_METHOD void DIEEntry::print(raw_ostream &O) const { O << format("Die: 0x%lx", (long)(intptr_t)&Entry); } -#endif //===----------------------------------------------------------------------===// // DIETypeSignature Implementation @@ -491,11 +487,10 @@ void DIETypeSignature::EmitValue(const AsmPrinter *Asm, Asm->OutStreamer->EmitIntValue(Unit->getTypeSignature(), 8); } -#ifndef NDEBUG +LLVM_DUMP_METHOD void DIETypeSignature::print(raw_ostream &O) const { O << format("Type Unit: 0x%lx", Unit->getTypeSignature()); } -#endif //===----------------------------------------------------------------------===// // DIELoc Implementation @@ -505,7 +500,7 @@ void DIETypeSignature::print(raw_ostream &O) const { /// unsigned DIELoc::ComputeSize(const AsmPrinter *AP) const { if (!Size) { - for (const auto &V : Values) + for (const auto &V : values()) Size += V.SizeOf(AP); } @@ -525,7 +520,7 @@ void DIELoc::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const { Asm->EmitULEB128(Size); break; } - for (const auto &V : Values) + for (const auto &V : values()) V.EmitValue(Asm); } @@ -543,12 +538,10 @@ unsigned DIELoc::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { } } -#ifndef NDEBUG +LLVM_DUMP_METHOD void DIELoc::print(raw_ostream &O) const { - O << "ExprLoc: "; - DIE::print(O, 5); + printValues(O, *this, "ExprLoc", Size, 5); } -#endif //===----------------------------------------------------------------------===// // DIEBlock Implementation @@ -558,7 +551,7 @@ void DIELoc::print(raw_ostream &O) const { /// unsigned DIEBlock::ComputeSize(const AsmPrinter *AP) const { if (!Size) { - for (const auto &V : Values) + for (const auto &V : values()) Size += V.SizeOf(AP); } @@ -576,7 +569,7 @@ void DIEBlock::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const { case dwarf::DW_FORM_block: Asm->EmitULEB128(Size); break; } - for (const auto &V : Values) + for (const auto &V : values()) V.EmitValue(Asm); } @@ -592,12 +585,10 @@ unsigned DIEBlock::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { } } -#ifndef NDEBUG +LLVM_DUMP_METHOD void DIEBlock::print(raw_ostream &O) const { - O << "Blk: "; - DIE::print(O, 5); + printValues(O, *this, "Blk", Size, 5); } -#endif //===----------------------------------------------------------------------===// // DIELocList Implementation @@ -608,7 +599,7 @@ unsigned DIELocList::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { return 4; if (Form == dwarf::DW_FORM_sec_offset) return 4; - return AP->getDataLayout().getPointerSize(); + return AP->getPointerSize(); } /// EmitValue - Emit label value. @@ -619,6 +610,5 @@ void DIELocList::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { AP->emitDwarfSymbolReference(Label, /*ForceOffset*/ DD->useSplitDwarf()); } -#ifndef NDEBUG +LLVM_DUMP_METHOD void DIELocList::print(raw_ostream &O) const { O << "LocList: " << Index; } -#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp index 5e60156fdfc9..02010654a6f4 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -470,38 +470,6 @@ void DIEHash::computeHash(const DIE &Die) { } /// This is based on the type signature computation given in section 7.27 of the -/// DWARF4 standard. It is the md5 hash of a flattened description of the DIE -/// with the exception that we are hashing only the context and the name of the -/// type. -uint64_t DIEHash::computeDIEODRSignature(const DIE &Die) { - - // Add the contexts to the hash. We won't be computing the ODR hash for - // function local types so it's safe to use the generic context hashing - // algorithm here. - // FIXME: If we figure out how to account for linkage in some way we could - // actually do this with a slight modification to the parent hash algorithm. - if (const DIE *Parent = Die.getParent()) - addParentContext(*Parent); - - // Add the current DIE information. - - // Add the DWARF tag of the DIE. - addULEB128(Die.getTag()); - - // Add the name of the type to the hash. - addString(getDIEStringAttr(Die, dwarf::DW_AT_name)); - - // Now get the result. - MD5::MD5Result Result; - Hash.final(Result); - - // ... take the least significant 8 bytes and return those. Our MD5 - // implementation always returns its results in little endian, swap bytes - // appropriately. - return support::endian::read64le(Result + 8); -} - -/// This is based on the type signature computation given in section 7.27 of the /// DWARF4 standard. It is an md5 hash of the flattened description of the DIE /// with the inclusion of the full CU and all top level CU entities. // TODO: Initialize the type chain at 0 instead of 1 for CU signatures. diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h index 833ca0276fdb..44f0ce88523d 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h @@ -84,9 +84,6 @@ class DIEHash { public: DIEHash(AsmPrinter *A = nullptr) : AP(A) {} - /// \brief Computes the ODR signature. - uint64_t computeDIEODRSignature(const DIE &Die); - /// \brief Computes the CU signature. uint64_t computeCUSignature(const DIE &Die); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h index afffa839a606..bbe53249a084 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h @@ -9,6 +9,8 @@ #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H #define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H + +#include "DebugLocStream.h" #include "llvm/ADT/SmallString.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" @@ -17,7 +19,6 @@ namespace llvm { class AsmPrinter; -class DebugLocStream; /// \brief This struct describes location entries emitted in the .debug_loc /// section. diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp index f8cdde203187..4ad3e1867328 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp @@ -41,7 +41,7 @@ void DwarfAccelTable::AddName(DwarfStringPoolEntryRef Name, const DIE *die, DIEs.Values.push_back(new (Allocator) HashDataContents(die, Flags)); } -void DwarfAccelTable::ComputeBucketCount(void) { +void DwarfAccelTable::ComputeBucketCount() { // First get the number of unique hashes. std::vector<uint32_t> uniques(Data.size()); for (size_t i = 0, e = Data.size(); i < e; ++i) diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index 2c212c7ecee1..6665c16159a0 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -78,12 +78,11 @@ void DwarfCFIException::endModule() { return; // Emit references to all used personality functions - const std::vector<const Function*> &Personalities = MMI->getPersonalities(); - for (size_t i = 0, e = Personalities.size(); i != e; ++i) { - if (!Personalities[i]) + for (const Function *Personality : MMI->getPersonalities()) { + if (!Personality) continue; - MCSymbol *Sym = Asm->getSymbol(Personalities[i]); - TLOF.emitPersonalityValue(*Asm->OutStreamer, Asm->TM, Sym); + MCSymbol *Sym = Asm->getSymbol(Personality); + TLOF.emitPersonalityValue(*Asm->OutStreamer, Asm->getDataLayout(), Sym); } } @@ -108,7 +107,6 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) { const Function *Per = nullptr; if (F->hasPersonalityFn()) Per = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts()); - assert(!MMI->getPersonality() || Per == MMI->getPersonality()); // Emit a personality function even when there are no landing pads bool forceEmitPersonality = diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index fc54a2925beb..725063a8177b 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -151,28 +151,33 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE( DIELoc *Loc = new (DIEValueAllocator) DIELoc; const MCSymbol *Sym = Asm->getSymbol(Global); if (Global->isThreadLocal()) { - // FIXME: Make this work with -gsplit-dwarf. - unsigned PointerSize = Asm->getDataLayout().getPointerSize(); - assert((PointerSize == 4 || PointerSize == 8) && - "Add support for other sizes if necessary"); - // Based on GCC's support for TLS: - if (!DD->useSplitDwarf()) { - // 1) Start with a constNu of the appropriate pointer size - addUInt(*Loc, dwarf::DW_FORM_data1, - PointerSize == 4 ? dwarf::DW_OP_const4u : dwarf::DW_OP_const8u); - // 2) containing the (relocated) offset of the TLS variable - // within the module's TLS block. - addExpr(*Loc, dwarf::DW_FORM_udata, - Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym)); + if (Asm->TM.Options.EmulatedTLS) { + // TODO: add debug info for emulated thread local mode. } else { - addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index); - addUInt(*Loc, dwarf::DW_FORM_udata, - DD->getAddressPool().getIndex(Sym, /* TLS */ true)); + // FIXME: Make this work with -gsplit-dwarf. + unsigned PointerSize = Asm->getDataLayout().getPointerSize(); + assert((PointerSize == 4 || PointerSize == 8) && + "Add support for other sizes if necessary"); + // Based on GCC's support for TLS: + if (!DD->useSplitDwarf()) { + // 1) Start with a constNu of the appropriate pointer size + addUInt(*Loc, dwarf::DW_FORM_data1, PointerSize == 4 + ? dwarf::DW_OP_const4u + : dwarf::DW_OP_const8u); + // 2) containing the (relocated) offset of the TLS variable + // within the module's TLS block. + addExpr(*Loc, dwarf::DW_FORM_udata, + Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym)); + } else { + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index); + addUInt(*Loc, dwarf::DW_FORM_udata, + DD->getAddressPool().getIndex(Sym, /* TLS */ true)); + } + // 3) followed by an OP to make the debugger do a TLS lookup. + addUInt(*Loc, dwarf::DW_FORM_data1, + DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address + : dwarf::DW_OP_form_tls_address); } - // 3) followed by an OP to make the debugger do a TLS lookup. - addUInt(*Loc, dwarf::DW_FORM_data1, - DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address - : dwarf::DW_OP_form_tls_address); } else { DD->addArangeLabel(SymbolCU(this, Sym)); addOpAddress(*Loc, Sym); @@ -338,9 +343,9 @@ void DwarfCompileUnit::constructScopeDIE( // Skip imported directives in gmlt-like data. if (!includeMinimalInlineScopes()) { // There is no need to emit empty lexical block DIE. - for (const auto &E : DD->findImportedEntitiesForScope(DS)) + for (const auto *IE : ImportedEntities[DS]) Children.push_back( - constructImportedEntityDIE(cast<DIImportedEntity>(E.second))); + constructImportedEntityDIE(cast<DIImportedEntity>(IE))); } // If there are only other scopes as children, put them directly in the @@ -435,6 +440,9 @@ DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) { addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None, getOrCreateSourceID(IA->getFilename(), IA->getDirectory())); addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, IA->getLine()); + if (IA->getDiscriminator()) + addUInt(*ScopeDIE, dwarf::DW_AT_GNU_discriminator, None, + IA->getDiscriminator()); // Add name to the name table, we do this here because we're guaranteed // to have concrete versions of our DW_TAG_inlined_subprogram nodes. @@ -517,8 +525,7 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, unsigned FrameReg = 0; const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering(); int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg); - assert(Expr != DV.getExpression().end() && - "Wrong number of expressions"); + assert(Expr != DV.getExpression().end() && "Wrong number of expressions"); DwarfExpr.AddMachineRegIndirect(FrameReg, Offset); DwarfExpr.AddExpression((*Expr)->expr_op_begin(), (*Expr)->expr_op_end()); ++Expr; @@ -597,8 +604,8 @@ DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope, return ObjectPointer; } -void -DwarfCompileUnit::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) { +void DwarfCompileUnit::constructAbstractSubprogramScopeDIE( + LexicalScope *Scope) { DIE *&AbsDef = DU->getAbstractSPDies()[Scope->getScopeNode()]; if (AbsDef) return; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 509c9432bcbf..2e2846790cc1 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -39,6 +39,12 @@ class DwarfCompileUnit : public DwarfUnit { /// The start of the unit within its section. MCSymbol *LabelBegin; + typedef llvm::SmallVector<const MDNode *, 8> ImportedEntityList; + typedef llvm::DenseMap<const MDNode *, ImportedEntityList> + ImportedEntityMap; + + ImportedEntityMap ImportedEntities; + /// GlobalNames - A map of globally visible named entities for this unit. StringMap<const DIE *> GlobalNames; @@ -98,6 +104,10 @@ public: unsigned getOrCreateSourceID(StringRef FileName, StringRef DirName) override; + void addImportedEntity(const DIImportedEntity* IE) { + ImportedEntities[IE->getScope()].push_back(IE); + } + /// addRange - Add an address range to the list of ranges for this unit. void addRange(RangeSpan Range); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 7d03a3930d7d..3466f3469f1c 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -33,6 +33,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/ValueHandle.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" @@ -104,6 +105,14 @@ DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden, clEnumVal(Disable, "Disabled"), clEnumValEnd), cl::init(Default)); +static cl::opt<DefaultOnOff> +DwarfLinkageNames("dwarf-linkage-names", cl::Hidden, + cl::desc("Emit DWARF linkage-name attributes."), + cl::values(clEnumVal(Default, "Default for platform"), + clEnumVal(Enable, "Enabled"), + clEnumVal(Disable, "Disabled"), clEnumValEnd), + cl::init(Default)); + static const char *const DWARFGroupName = "DWARF Emission"; static const char *const DbgTimerName = "DWARF Debug Writer"; @@ -176,9 +185,9 @@ const DIType *DbgVariable::getType() const { if (tag == dwarf::DW_TAG_pointer_type) subType = resolve(cast<DIDerivedType>(Ty)->getBaseType()); - auto Elements = cast<DICompositeTypeBase>(subType)->getElements(); + auto Elements = cast<DICompositeType>(subType)->getElements(); for (unsigned i = 0, N = Elements.size(); i < N; ++i) { - auto *DT = cast<DIDerivedTypeBase>(Elements[i]); + auto *DT = cast<DIDerivedType>(Elements[i]); if (getName() == DT->getName()) return resolve(DT->getBaseType()); } @@ -194,45 +203,67 @@ static LLVM_CONSTEXPR DwarfAccelTable::Atom TypeAtoms[] = { DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) : Asm(A), MMI(Asm->MMI), DebugLocs(A->OutStreamer->isVerboseAsm()), PrevLabel(nullptr), InfoHolder(A, "info_string", DIEValueAllocator), - UsedNonDefaultText(false), SkeletonHolder(A, "skel_string", DIEValueAllocator), IsDarwin(Triple(A->getTargetTriple()).isOSDarwin()), - IsPS4(Triple(A->getTargetTriple()).isPS4()), AccelNames(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)), AccelObjC(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)), AccelNamespace(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)), - AccelTypes(TypeAtoms) { + AccelTypes(TypeAtoms), DebuggerTuning(DebuggerKind::Default) { CurFn = nullptr; CurMI = nullptr; + Triple TT(Asm->getTargetTriple()); + + // Make sure we know our "debugger tuning." The target option takes + // precedence; fall back to triple-based defaults. + if (Asm->TM.Options.DebuggerTuning != DebuggerKind::Default) + DebuggerTuning = Asm->TM.Options.DebuggerTuning; + else if (IsDarwin || TT.isOSFreeBSD()) + DebuggerTuning = DebuggerKind::LLDB; + else if (TT.isPS4CPU()) + DebuggerTuning = DebuggerKind::SCE; + else + DebuggerTuning = DebuggerKind::GDB; - // Turn on accelerator tables for Darwin by default, pubnames by - // default for non-Darwin/PS4, and handle split dwarf. + // Turn on accelerator tables for LLDB by default. if (DwarfAccelTables == Default) - HasDwarfAccelTables = IsDarwin; + HasDwarfAccelTables = tuneForLLDB(); else HasDwarfAccelTables = DwarfAccelTables == Enable; + // Handle split DWARF. Off by default for now. if (SplitDwarf == Default) HasSplitDwarf = false; else HasSplitDwarf = SplitDwarf == Enable; + // Pubnames/pubtypes on by default for GDB. if (DwarfPubSections == Default) - HasDwarfPubSections = !IsDarwin && !IsPS4; + HasDwarfPubSections = tuneForGDB(); else HasDwarfPubSections = DwarfPubSections == Enable; + // SCE does not use linkage names. + if (DwarfLinkageNames == Default) + UseLinkageNames = !tuneForSCE(); + else + UseLinkageNames = DwarfLinkageNames == Enable; + unsigned DwarfVersionNumber = Asm->TM.Options.MCOptions.DwarfVersion; DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber : MMI->getModule()->getDwarfVersion(); + // Use dwarf 4 by default if nothing is requested. + DwarfVersion = DwarfVersion ? DwarfVersion : dwarf::DWARF_VERSION; - // Darwin and PS4 use the standard TLS opcode (defined in DWARF 3). - // Everybody else uses GNU's. - UseGNUTLSOpcode = !(IsDarwin || IsPS4) || DwarfVersion < 3; + // Work around a GDB bug. GDB doesn't support the standard opcode; + // SCE doesn't support GNU's; LLDB prefers the standard opcode, which + // is defined as of DWARF 3. + // See GDB bug 11616 - DW_OP_form_tls_address is unimplemented + // https://sourceware.org/bugzilla/show_bug.cgi?id=11616 + UseGNUTLSOpcode = tuneForGDB() || DwarfVersion < 3; Asm->OutStreamer->getContext().setDwarfVersion(DwarfVersion); @@ -300,18 +331,6 @@ void DwarfDebug::addSubprogramNames(const DISubprogram *SP, DIE &Die) { } } -/// isSubprogramContext - Return true if Context is either a subprogram -/// or another context nested inside a subprogram. -bool DwarfDebug::isSubprogramContext(const MDNode *Context) { - if (!Context) - return false; - if (isa<DISubprogram>(Context)) - return true; - if (auto *T = dyn_cast<DIType>(Context)) - return isSubprogramContext(resolve(T->getScope())); - return false; -} - /// Check whether we should create a DIE for the given Scope, return true /// if we don't create a DIE (the corresponding DIE is null). bool DwarfDebug::isLexicalScopeDIENull(LexicalScope *Scope) { @@ -416,6 +435,16 @@ DwarfDebug::constructDwarfCompileUnit(const DICompileUnit *DIUnit) { else NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection()); + if (DIUnit->getDWOId()) { + // This CU is either a clang module DWO or a skeleton CU. + NewCU.addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, + DIUnit->getDWOId()); + if (!DIUnit->getSplitDebugFilename().empty()) + // This is a prefabricated skeleton CU. + NewCU.addString(Die, dwarf::DW_AT_GNU_dwo_name, + DIUnit->getSplitDebugFilename()); + } + CUMap.insert(std::make_pair(DIUnit, &NewCU)); CUDieMap.insert(std::make_pair(&Die, &NewCU)); return NewCU; @@ -436,8 +465,6 @@ void DwarfDebug::beginModule() { const Module *M = MMI->getModule(); - FunctionDIs = makeSubprogramMap(*M); - NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); if (!CU_Nodes) return; @@ -449,12 +476,7 @@ void DwarfDebug::beginModule() { auto *CUNode = cast<DICompileUnit>(N); DwarfCompileUnit &CU = constructDwarfCompileUnit(CUNode); for (auto *IE : CUNode->getImportedEntities()) - ScopesWithImportedEntities.push_back(std::make_pair(IE->getScope(), IE)); - // Stable sort to preserve the order of appearance of imported entities. - // This is to avoid out-of-order processing of interdependent declarations - // within the same scope, e.g. { namespace A = base; namespace B = A; } - std::stable_sort(ScopesWithImportedEntities.begin(), - ScopesWithImportedEntities.end(), less_first()); + CU.addImportedEntity(IE); for (auto *GV : CUNode->getGlobalVariables()) CU.getOrCreateGlobalVariableDIE(GV); for (auto *SP : CUNode->getSubprograms()) @@ -467,7 +489,10 @@ void DwarfDebug::beginModule() { for (auto *Ty : CUNode->getRetainedTypes()) { // The retained types array by design contains pointers to // MDNodes rather than DIRefs. Unique them here. - CU.getOrCreateTypeDIE(cast<DIType>(resolve(Ty->getRef()))); + DIType *RT = cast<DIType>(resolve(Ty->getRef())); + if (!RT->isExternalTypeRef()) + // There is no point in force-emitting a forward declaration. + CU.getOrCreateTypeDIE(RT); } // Emit imported_modules last so that the relevant context is already // available. @@ -1061,12 +1086,8 @@ static DebugLoc findPrologueEndLoc(const MachineFunction *MF) { for (const auto &MBB : *MF) for (const auto &MI : MBB) if (!MI.isDebugValue() && !MI.getFlag(MachineInstr::FrameSetup) && - MI.getDebugLoc()) { - // Did the target forget to set the FrameSetup flag for CFI insns? - assert(!MI.isCFIInstruction() && - "First non-frame-setup instruction is a CFI instruction."); + MI.getDebugLoc()) return MI.getDebugLoc(); - } return DebugLoc(); } @@ -1079,8 +1100,8 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { if (!MMI->hasDebugInfo()) return; - auto DI = FunctionDIs.find(MF->getFunction()); - if (DI == FunctionDIs.end()) + auto DI = MF->getFunction()->getSubprogram(); + if (!DI) return; // Grab the lexical scopes for the function, if we don't have any of those @@ -1127,7 +1148,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // The first mention of a function argument gets the CurrentFnBegin // label, so arguments are visible when breaking at function entry. const DILocalVariable *DIVar = Ranges.front().first->getDebugVariable(); - if (DIVar->getTag() == dwarf::DW_TAG_arg_variable && + if (DIVar->isParameter() && getDISubprogram(DIVar->getScope())->describes(MF->getFunction())) { LabelsBeforeInsn[Ranges.front().first] = Asm->getFunctionBegin(); if (Ranges.front().first->getDebugExpression()->isBitPiece()) { @@ -1171,7 +1192,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { "endFunction should be called with the same function as beginFunction"); if (!MMI->hasDebugInfo() || LScopes.empty() || - !FunctionDIs.count(MF->getFunction())) { + !MF->getFunction()->getSubprogram()) { // If we don't have a lexical scope for this function then there will // be a hole in the range information. Keep note of this by setting the // previously used section to nullptr. @@ -1863,7 +1884,7 @@ void DwarfDebug::emitDebugLineDWO() { assert(useSplitDwarf() && "No split dwarf?"); Asm->OutStreamer->SwitchSection( Asm->getObjFileLowering().getDwarfLineDWOSection()); - SplitTypeUnitFileTable.Emit(*Asm->OutStreamer); + SplitTypeUnitFileTable.Emit(*Asm->OutStreamer, MCDwarfLineTableParams()); } // Emit the .debug_str.dwo section for separated dwarf. This contains the @@ -1884,7 +1905,7 @@ MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) { return &SplitTypeUnitFileTable; } -static uint64_t makeTypeSignature(StringRef Identifier) { +uint64_t DwarfDebug::makeTypeSignature(StringRef Identifier) { MD5 Hash; Hash.update(Identifier); // ... take the least significant 8 bytes and return those. Our MD5 diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index 01f34c6eb81c..4c613a905450 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -33,6 +33,7 @@ #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MachineLocation.h" #include "llvm/Support/Allocator.h" +#include "llvm/Target/TargetOptions.h" #include <memory> namespace llvm { @@ -49,24 +50,6 @@ class DwarfUnit; class MachineModuleInfo; //===----------------------------------------------------------------------===// -/// This class is used to record source line correspondence. -class SrcLineInfo { - unsigned Line; // Source line number. - unsigned Column; // Source column. - unsigned SourceID; // Source ID number. - MCSymbol *Label; // Label in code ID number. -public: - SrcLineInfo(unsigned L, unsigned C, unsigned S, MCSymbol *label) - : Line(L), Column(C), SourceID(S), Label(label) {} - - // Accessors - unsigned getLine() const { return Line; } - unsigned getColumn() const { return Column; } - unsigned getSourceID() const { return SourceID; } - MCSymbol *getLabel() const { return Label; } -}; - -//===----------------------------------------------------------------------===// /// This class is used to track local variable information. /// /// Variables can be created from allocas, in which case they're generated from @@ -127,14 +110,14 @@ public: // Accessors. const DILocalVariable *getVariable() const { return Var; } const DILocation *getInlinedAt() const { return IA; } - const ArrayRef<const DIExpression *> getExpression() const { return Expr; } + ArrayRef<const DIExpression *> getExpression() const { return Expr; } void setDIE(DIE &D) { TheDIE = &D; } DIE *getDIE() const { return TheDIE; } void setDebugLocListIndex(unsigned O) { DebugLocListIndex = O; } unsigned getDebugLocListIndex() const { return DebugLocListIndex; } StringRef getName() const { return Var->getName(); } const MachineInstr *getMInsn() const { return MInsn; } - const ArrayRef<int> getFrameIndex() const { return FrameIndex; } + ArrayRef<int> getFrameIndex() const { return FrameIndex; } void addMMIEntry(const DbgVariable &V) { assert(DebugLocListIndex == ~0U && !MInsn && "not an MMI entry"); @@ -156,7 +139,8 @@ public: // Translate tag to proper Dwarf tag. dwarf::Tag getTag() const { - if (Var->getTag() == dwarf::DW_TAG_arg_variable) + // FIXME: Why don't we just infer this tag and store it all along? + if (Var->isParameter()) return dwarf::DW_TAG_formal_parameter; return dwarf::DW_TAG_variable; @@ -282,11 +266,6 @@ class DwarfDebug : public AsmPrinterHandler { /// Holders for the various debug information flags that we might need to /// have exposed. See accessor functions below for description. - /// Holder for imported entities. - typedef SmallVector<std::pair<const MDNode *, const MDNode *>, 32> - ImportedEntityMap; - ImportedEntityMap ScopesWithImportedEntities; - /// Map from MDNodes for user-defined types to the type units that /// describe them. DenseMap<const MDNode *, const DwarfTypeUnit *> DwarfTypeUnits; @@ -298,16 +277,12 @@ class DwarfDebug : public AsmPrinterHandler { /// Whether to emit the pubnames/pubtypes sections. bool HasDwarfPubSections; - /// Whether or not to use AT_ranges for compilation units. - bool HasCURanges; - - /// Whether we emitted a function into a section other than the - /// default text. - bool UsedNonDefaultText; - /// Whether to use the GNU TLS opcode (instead of the standard opcode). bool UseGNUTLSOpcode; + /// Whether to emit DW_AT_[MIPS_]linkage_name. + bool UseLinkageNames; + /// Version of dwarf we're emitting. unsigned DwarfVersion; @@ -338,7 +313,6 @@ class DwarfDebug : public AsmPrinterHandler { /// True iff there are multiple CUs in this module. bool SingleCU; bool IsDarwin; - bool IsPS4; AddressPool AddrPool; @@ -347,7 +321,8 @@ class DwarfDebug : public AsmPrinterHandler { DwarfAccelTable AccelNamespace; DwarfAccelTable AccelTypes; - DenseMap<const Function *, DISubprogram *> FunctionDIs; + // Identify a debugger for "tuning" the debug info. + DebuggerKind DebuggerTuning; MCDwarfDwoLineTable *getDwoLineTable(const DwarfCompileUnit &); @@ -372,12 +347,6 @@ class DwarfDebug : public AsmPrinterHandler { /// Construct a DIE for this abstract scope. void constructAbstractSubprogramScopeDIE(LexicalScope *Scope); - /// Compute the size and offset of a DIE given an incoming Offset. - unsigned computeSizeAndOffset(DIE *Die, unsigned Offset); - - /// Compute the size and offset of all the DIEs. - void computeSizeAndOffsets(); - /// Collect info for variables that were optimized out. void collectDeadVariables(); @@ -443,9 +412,6 @@ class DwarfDebug : public AsmPrinterHandler { /// Emit visible names into a debug ranges section. void emitDebugRanges(); - /// Emit inline info using custom format. - void emitDebugInlineInfo(); - /// DWARF 5 Experimental Split Dwarf Emitters /// Initialize common features of skeleton units. @@ -456,10 +422,6 @@ class DwarfDebug : public AsmPrinterHandler { /// section. DwarfCompileUnit &constructSkeletonCU(const DwarfCompileUnit &CU); - /// Construct the split debug info compile unit for the debug info - /// section. - DwarfTypeUnit &constructSkeletonTU(DwarfTypeUnit &TU); - /// Emit the debug info dwo section. void emitDebugInfoDWO(); @@ -544,6 +506,9 @@ public: /// Process end of an instruction. void endInstruction() override; + /// Perform an MD5 checksum of \p Identifier and return the lower 64 bits. + static uint64_t makeTypeSignature(StringRef Identifier); + /// Add a DIE to the set of types that we're going to pull into /// type units. void addDwarfTypeUnitType(DwarfCompileUnit &CU, StringRef Identifier, @@ -558,10 +523,22 @@ public: SymSize[Sym] = Size; } + /// Returns whether to emit DW_AT_[MIPS_]linkage_name. + bool useLinkageNames() const { return UseLinkageNames; } + /// Returns whether to use DW_OP_GNU_push_tls_address, instead of the /// standard DW_OP_form_tls_address opcode bool useGNUTLSOpcode() const { return UseGNUTLSOpcode; } + /// \defgroup DebuggerTuning Predicates to tune DWARF for a given debugger. + /// + /// Returns whether we are "tuning" for a given debugger. + /// @{ + bool tuneForGDB() const { return DebuggerTuning == DebuggerKind::GDB; } + bool tuneForLLDB() const { return DebuggerTuning == DebuggerKind::LLDB; } + bool tuneForSCE() const { return DebuggerTuning == DebuggerKind::SCE; } + /// @} + // Experimental DWARF5 features. /// Returns whether or not to emit tables that dwarf consumers can @@ -604,9 +581,6 @@ public: DwarfCompileUnit *lookupUnit(const DIE *CU) const { return CUDieMap.lookup(CU); } - /// isSubprogramContext - Return true if Context is either a subprogram - /// or another context nested inside a subprogram. - bool isSubprogramContext(const MDNode *Context); void addSubprogramNames(const DISubprogram *SP, DIE &Die); @@ -622,14 +596,6 @@ public: const MachineFunction *getCurrentFunction() const { return CurFn; } - iterator_range<ImportedEntityMap::const_iterator> - findImportedEntitiesForScope(const MDNode *Scope) const { - return make_range(std::equal_range( - ScopesWithImportedEntities.begin(), ScopesWithImportedEntities.end(), - std::pair<const MDNode *, const MDNode *>(Scope, nullptr), - less_first())); - } - /// A helper function to check whether the DIE for a given Scope is /// going to be null. bool isLexicalScopeDIENull(LexicalScope *Scope); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index a2799b8d6300..7b5b831da166 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -211,12 +211,15 @@ bool DwarfExpression::AddMachineRegExpression(const DIExpression *Expr, return AddMachineRegPiece(MachineReg, SizeInBits, getOffsetOrZero(OffsetInBits, PieceOffsetInBits)); } - case dwarf::DW_OP_plus: { - // [DW_OP_reg,Offset,DW_OP_plus,DW_OP_deref] --> [DW_OP_breg,Offset]. + case dwarf::DW_OP_plus: + case dwarf::DW_OP_minus: { + // [DW_OP_reg,Offset,DW_OP_plus, DW_OP_deref] --> [DW_OP_breg, Offset]. + // [DW_OP_reg,Offset,DW_OP_minus,DW_OP_deref] --> [DW_OP_breg,-Offset]. auto N = I.getNext(); if (N != E && N->getOp() == dwarf::DW_OP_deref) { unsigned Offset = I->getArg(0); - ValidReg = AddMachineRegIndirect(MachineReg, Offset); + ValidReg = AddMachineRegIndirect( + MachineReg, I->getOp() == dwarf::DW_OP_plus ? Offset : -Offset); std::advance(I, 2); break; } else @@ -255,6 +258,12 @@ void DwarfExpression::AddExpression(DIExpression::expr_op_iterator I, EmitOp(dwarf::DW_OP_plus_uconst); EmitUnsigned(I->getArg(0)); break; + case dwarf::DW_OP_minus: + // There is no OP_minus_uconst. + EmitOp(dwarf::DW_OP_constu); + EmitUnsigned(I->getArg(0)); + EmitOp(dwarf::DW_OP_minus); + break; case dwarf::DW_OP_deref: EmitOp(dwarf::DW_OP_deref); break; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 355582298e5e..d75fea5d8c8a 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -192,18 +192,19 @@ void DwarfUnit::addFlag(DIE &Die, dwarf::Attribute Attribute) { DIEInteger(1)); } -void DwarfUnit::addUInt(DIE &Die, dwarf::Attribute Attribute, +void DwarfUnit::addUInt(DIEValueList &Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form, uint64_t Integer) { if (!Form) Form = DIEInteger::BestForm(false, Integer); Die.addValue(DIEValueAllocator, Attribute, *Form, DIEInteger(Integer)); } -void DwarfUnit::addUInt(DIE &Block, dwarf::Form Form, uint64_t Integer) { +void DwarfUnit::addUInt(DIEValueList &Block, dwarf::Form Form, + uint64_t Integer) { addUInt(Block, (dwarf::Attribute)0, Form, Integer); } -void DwarfUnit::addSInt(DIE &Die, dwarf::Attribute Attribute, +void DwarfUnit::addSInt(DIEValueList &Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form, int64_t Integer) { if (!Form) Form = DIEInteger::BestForm(true, Integer); @@ -222,9 +223,10 @@ void DwarfUnit::addString(DIE &Die, dwarf::Attribute Attribute, DIEString(DU->getStringPool().getEntry(*Asm, String))); } -DIE::value_iterator DwarfUnit::addLabel(DIE &Die, dwarf::Attribute Attribute, - dwarf::Form Form, - const MCSymbol *Label) { +DIEValueList::value_iterator DwarfUnit::addLabel(DIEValueList &Die, + dwarf::Attribute Attribute, + dwarf::Form Form, + const MCSymbol *Label) { return Die.addValue(DIEValueAllocator, Attribute, Form, DIELabel(Label)); } @@ -277,6 +279,13 @@ void DwarfUnit::addDIETypeSignature(DIE &Die, const DwarfTypeUnit &Type) { dwarf::DW_FORM_ref_sig8, DIETypeSignature(Type)); } +void DwarfUnit::addDIETypeSignature(DIE &Die, dwarf::Attribute Attribute, + StringRef Identifier) { + uint64_t Signature = DD->makeTypeSignature(Identifier); + Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_ref_sig8, + DIEInteger(Signature)); +} + void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIEEntry Entry) { const DIE *DieCU = Die.getUnitOrNull(); @@ -292,8 +301,6 @@ void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute, } DIE &DwarfUnit::createAndAddDIE(unsigned Tag, DIE &Parent, const DINode *N) { - assert(Tag != dwarf::DW_TAG_auto_variable && - Tag != dwarf::DW_TAG_arg_variable); DIE &Die = Parent.addChild(DIE::get(DIEValueAllocator, (dwarf::Tag)Tag)); if (N) insertDIE(N, &Die); @@ -445,7 +452,7 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die, // Find the __forwarding field and the variable field in the __Block_byref // struct. - DINodeArray Fields = cast<DICompositeTypeBase>(TmpTy)->getElements(); + DINodeArray Fields = cast<DICompositeType>(TmpTy)->getElements(); const DIDerivedType *varField = nullptr; const DIDerivedType *forwardingField = nullptr; @@ -506,34 +513,35 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die, /// Return true if type encoding is unsigned. static bool isUnsignedDIType(DwarfDebug *DD, const DIType *Ty) { - if (auto *DTy = dyn_cast<DIDerivedTypeBase>(Ty)) { + if (auto *CTy = dyn_cast<DICompositeType>(Ty)) { + // FIXME: Enums without a fixed underlying type have unknown signedness + // here, leading to incorrectly emitted constants. + if (CTy->getTag() == dwarf::DW_TAG_enumeration_type) + return false; + + // (Pieces of) aggregate types that get hacked apart by SROA may be + // represented by a constant. Encode them as unsigned bytes. + return true; + } + + if (auto *DTy = dyn_cast<DIDerivedType>(Ty)) { dwarf::Tag T = (dwarf::Tag)Ty->getTag(); // Encode pointer constants as unsigned bytes. This is used at least for // null pointer constant emission. - // (Pieces of) aggregate types that get hacked apart by SROA may also be - // represented by a constant. Encode them as unsigned bytes. // FIXME: reference and rvalue_reference /probably/ shouldn't be allowed // here, but accept them for now due to a bug in SROA producing bogus // dbg.values. - if (T == dwarf::DW_TAG_array_type || - T == dwarf::DW_TAG_class_type || - T == dwarf::DW_TAG_pointer_type || + if (T == dwarf::DW_TAG_pointer_type || T == dwarf::DW_TAG_ptr_to_member_type || T == dwarf::DW_TAG_reference_type || - T == dwarf::DW_TAG_rvalue_reference_type || - T == dwarf::DW_TAG_structure_type || - T == dwarf::DW_TAG_union_type) + T == dwarf::DW_TAG_rvalue_reference_type) return true; assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type || T == dwarf::DW_TAG_volatile_type || - T == dwarf::DW_TAG_restrict_type || - T == dwarf::DW_TAG_enumeration_type); - if (DITypeRef Deriv = DTy->getBaseType()) - return isUnsignedDIType(DD, DD->resolve(Deriv)); - // FIXME: Enums without a fixed underlying type have unknown signedness - // here, leading to incorrectly emitted constants. - assert(DTy->getTag() == dwarf::DW_TAG_enumeration_type); - return false; + T == dwarf::DW_TAG_restrict_type); + DITypeRef Deriv = DTy->getBaseType(); + assert(Deriv && "Expected valid base type"); + return isUnsignedDIType(DD, DD->resolve(Deriv)); } auto *BTy = cast<DIBasicType>(Ty); @@ -659,7 +667,7 @@ void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, bool Unsigned) { } void DwarfUnit::addLinkageName(DIE &Die, StringRef LinkageName) { - if (!LinkageName.empty()) + if (!LinkageName.empty() && DD->useLinkageNames()) addString(Die, DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name : dwarf::DW_AT_MIPS_linkage_name, @@ -685,6 +693,8 @@ DIE *DwarfUnit::getOrCreateContextDIE(const DIScope *Context) { return getOrCreateNameSpace(NS); if (auto *SP = dyn_cast<DISubprogram>(Context)) return getOrCreateSubprogramDIE(SP); + if (auto *M = dyn_cast<DIModule>(Context)) + return getOrCreateModule(M); return getDIE(Context); } @@ -700,7 +710,8 @@ DIE *DwarfUnit::createTypeDIE(const DICompositeType *Ty) { constructTypeDIE(TyDIE, cast<DICompositeType>(Ty)); - updateAcceleratorTables(Context, Ty, TyDIE); + if (!Ty->isExternalTypeRef()) + updateAcceleratorTables(Context, Ty, TyDIE); return &TyDIE; } @@ -753,7 +764,7 @@ void DwarfUnit::updateAcceleratorTables(const DIScope *Context, const DIType *Ty, const DIE &TyDIE) { if (!Ty->getName().empty() && !Ty->isForwardDecl()) { bool IsImplementation = 0; - if (auto *CT = dyn_cast<DICompositeTypeBase>(Ty)) { + if (auto *CT = dyn_cast<DICompositeType>(Ty)) { // A runtime language of 0 actually means C/C++ and that any // non-negative value is some version of Objective-C/C++. IsImplementation = CT->getRuntimeLang() == 0 || CT->isObjcClassComplete(); @@ -795,8 +806,7 @@ std::string DwarfUnit::getParentContextString(const DIScope *Context) const { // Reverse iterate over our list to go from the outermost construct to the // innermost. - for (auto I = Parents.rbegin(), E = Parents.rend(); I != E; ++I) { - const DIScope *Ctx = *I; + for (const DIScope *Ctx : make_range(Parents.rbegin(), Parents.rend())) { StringRef Name = Ctx->getName(); if (Name.empty() && isa<DINamespace>(Ctx)) Name = "(anonymous namespace)"; @@ -843,7 +853,9 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) { // Add size if non-zero (derived types might be zero-sized.) if (Size && Tag != dwarf::DW_TAG_pointer_type - && Tag != dwarf::DW_TAG_ptr_to_member_type) + && Tag != dwarf::DW_TAG_ptr_to_member_type + && Tag != dwarf::DW_TAG_reference_type + && Tag != dwarf::DW_TAG_rvalue_reference_type) addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size); if (Tag == dwarf::DW_TAG_ptr_to_member_type) @@ -899,6 +911,13 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy) { } void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) { + if (CTy->isExternalTypeRef()) { + StringRef Identifier = CTy->getIdentifier(); + assert(!Identifier.empty() && "external type ref without identifier"); + addFlag(Buffer, dwarf::DW_AT_declaration); + return addDIETypeSignature(Buffer, dwarf::DW_AT_signature, Identifier); + } + // Add name if not anonymous or intermediate type. StringRef Name = CTy->getName(); @@ -1134,6 +1153,14 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP, "definition DIE was created in " "getOrCreateSubprogramDIE"); DeclLinkageName = SPDecl->getLinkageName(); + unsigned DeclID = + getOrCreateSourceID(SPDecl->getFilename(), SPDecl->getDirectory()); + unsigned DefID = getOrCreateSourceID(SP->getFilename(), SP->getDirectory()); + if (DeclID != DefID) + addUInt(SPDie, dwarf::DW_AT_decl_file, None, DefID); + + if (SP->getLine() != SPDecl->getLine()) + addUInt(SPDie, dwarf::DW_AT_decl_line, None, SP->getLine()); } // Add function template parameters. @@ -1180,11 +1207,10 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie, Language == dwarf::DW_LANG_ObjC)) addFlag(SPDie, dwarf::DW_AT_prototyped); - const DISubroutineType *SPTy = SP->getType(); - assert(SPTy->getTag() == dwarf::DW_TAG_subroutine_type && - "the type of a subprogram should be a subroutine"); + DITypeRefArray Args; + if (const DISubroutineType *SPTy = SP->getType()) + Args = SPTy->getTypeArray(); - auto Args = SPTy->getTypeArray(); // Add a return type. If this is a type like a C/C++ void type we don't add a // return type. if (Args.size()) diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h index 44d9d2245dda..82760bf21839 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -113,13 +113,6 @@ protected: DwarfUnit(unsigned UID, dwarf::Tag, const DICompileUnit *CU, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU); - /// Add a string attribute data and value. - /// - /// This is guaranteed to be in the local string pool instead of indirected. - void addLocalString(DIE &Die, dwarf::Attribute Attribute, StringRef Str); - - void addIndexedString(DIE &Die, dwarf::Attribute Attribute, StringRef Str); - bool applySubprogramDefinitionAttributes(const DISubprogram *SP, DIE &SPDie); public: @@ -162,9 +155,6 @@ public: virtual void addGlobalType(const DIType *Ty, const DIE &Die, const DIScope *Context) {} - /// Add a new name to the namespace accelerator table. - void addAccelNamespace(StringRef Name, const DIE &Die); - /// Returns the DIE map slot for the specified debug variable. /// /// We delegate the request to DwarfDebug when the MDNode can be part of the @@ -186,14 +176,14 @@ public: void addFlag(DIE &Die, dwarf::Attribute Attribute); /// Add an unsigned integer attribute data and value. - void addUInt(DIE &Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form, - uint64_t Integer); + void addUInt(DIEValueList &Die, dwarf::Attribute Attribute, + Optional<dwarf::Form> Form, uint64_t Integer); - void addUInt(DIE &Block, dwarf::Form Form, uint64_t Integer); + void addUInt(DIEValueList &Block, dwarf::Form Form, uint64_t Integer); /// Add an signed integer attribute data and value. - void addSInt(DIE &Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form, - int64_t Integer); + void addSInt(DIEValueList &Die, dwarf::Attribute Attribute, + Optional<dwarf::Form> Form, int64_t Integer); void addSInt(DIELoc &Die, Optional<dwarf::Form> Form, int64_t Integer); @@ -206,8 +196,10 @@ public: void addString(DIE &Die, dwarf::Attribute Attribute, StringRef Str); /// Add a Dwarf label attribute data and value. - DIE::value_iterator addLabel(DIE &Die, dwarf::Attribute Attribute, - dwarf::Form Form, const MCSymbol *Label); + DIEValueList::value_iterator addLabel(DIEValueList &Die, + dwarf::Attribute Attribute, + dwarf::Form Form, + const MCSymbol *Label); void addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label); @@ -228,7 +220,11 @@ public: /// Add a DIE attribute data and value. void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIEEntry Entry); + /// Add a type's DW_AT_signature and set the declaration flag. void addDIETypeSignature(DIE &Die, const DwarfTypeUnit &Type); + /// Add an attribute containing the type signature for a unique identifier. + void addDIETypeSignature(DIE &Die, dwarf::Attribute Attribute, + StringRef Identifier); /// Add block data. void addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Block); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp index 49ef8d3ddc8f..e24dcb1bffd4 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -662,9 +662,8 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) { Entry = TypeInfos.size(); } - for (std::vector<const GlobalValue *>::const_reverse_iterator - I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) { - const GlobalValue *GV = *I; + for (const GlobalValue *GV : make_range(TypeInfos.rbegin(), + TypeInfos.rend())) { if (VerboseAsm) Asm->OutStreamer->AddComment("TypeInfo " + Twine(Entry--)); Asm->EmitTTypeReference(GV, TTypeEncoding); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h index e42e082acbf9..c6a0e9d0524c 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h @@ -76,10 +76,6 @@ protected: SmallVectorImpl<ActionEntry> &Actions, SmallVectorImpl<unsigned> &FirstActions); - /// Return `true' if this is a call to a function marked `nounwind'. Return - /// `false' otherwise. - bool callToNoUnwindFunction(const MachineInstr *MI); - void computePadMap(const SmallVectorImpl<const LandingPadInfo *> &LandingPads, RangeMapType &PadMap); @@ -131,6 +127,10 @@ public: void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {} void beginInstruction(const MachineInstr *MI) override {} void endInstruction() override {} + + /// Return `true' if this is a call to a function marked `nounwind'. Return + /// `false' otherwise. + static bool callToNoUnwindFunction(const MachineInstr *MI); }; } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp index eb9e4c10daf4..6a023b998b32 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp @@ -48,7 +48,7 @@ void llvm::linkErlangGCPrinter() {} void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) { MCStreamer &OS = *AP.OutStreamer; - unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize(); + unsigned IntPtrSize = M.getDataLayout().getPointerSize(); // Put this in a custom .note section. OS.SwitchSection( diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index 2ceec61ab5ca..c09ef6adea69 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -93,7 +93,7 @@ void OcamlGCMetadataPrinter::beginAssembly(Module &M, GCModuleInfo &Info, /// void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) { - unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize(); + unsigned IntPtrSize = M.getDataLayout().getPointerSize(); AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getTextSection()); EmitCamlGlobal(M, AP, "code_end"); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp index 6610ac78f8c4..c2c0f84e5c92 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp @@ -27,15 +27,15 @@ StringRef WinCodeViewLineTables::getFullFilepath(const MDNode *S) { auto *Scope = cast<DIScope>(S); StringRef Dir = Scope->getDirectory(), Filename = Scope->getFilename(); - char *&Result = DirAndFilenameToFilepathMap[std::make_pair(Dir, Filename)]; - if (Result) - return Result; + std::string &Filepath = + DirAndFilenameToFilepathMap[std::make_pair(Dir, Filename)]; + if (!Filepath.empty()) + return Filepath; // Clang emits directory and relative filename info into the IR, but CodeView // operates on full paths. We could change Clang to emit full paths too, but // that would increase the IR size and probably not needed for other users. // For now, just concatenate and canonicalize the path here. - std::string Filepath; if (Filename.find(':') == 1) Filepath = Filename; else @@ -74,8 +74,7 @@ StringRef WinCodeViewLineTables::getFullFilepath(const MDNode *S) { while ((Cursor = Filepath.find("\\\\", Cursor)) != std::string::npos) Filepath.erase(Cursor, 1); - Result = strdup(Filepath.c_str()); - return StringRef(Result); + return Filepath; } void WinCodeViewLineTables::maybeRecordLocation(DebugLoc DL, @@ -253,7 +252,7 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) { } FilenameSegmentLengths[LastSegmentEnd] = FI.Instrs.size() - LastSegmentEnd; - // Emit a line table subsection, requred to do PC-to-file:line lookup. + // Emit a line table subsection, required to do PC-to-file:line lookup. Asm->OutStreamer->AddComment("Line table subsection for " + Twine(FuncName)); Asm->EmitInt32(COFF::DEBUG_LINE_TABLE_SUBSECTION); MCSymbol *LineTableBegin = Asm->MMI->getContext().createTempSymbol(), diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h index 43d1a432712e..78068e07c16f 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h @@ -98,7 +98,7 @@ class LLVM_LIBRARY_VISIBILITY WinCodeViewLineTables : public AsmPrinterHandler { } } FileNameRegistry; - typedef std::map<std::pair<StringRef, StringRef>, char *> + typedef std::map<std::pair<StringRef, StringRef>, std::string> DirAndFilenameToFilepathMapTy; DirAndFilenameToFilepathMapTy DirAndFilenameToFilepathMap; StringRef getFullFilepath(const MDNode *S); @@ -116,14 +116,6 @@ class LLVM_LIBRARY_VISIBILITY WinCodeViewLineTables : public AsmPrinterHandler { public: WinCodeViewLineTables(AsmPrinter *Asm); - ~WinCodeViewLineTables() override { - for (DirAndFilenameToFilepathMapTy::iterator - I = DirAndFilenameToFilepathMap.begin(), - E = DirAndFilenameToFilepathMap.end(); - I != E; ++I) - free(I->second); - } - void setSymbolSize(const llvm::MCSymbol *, uint64_t) override {} /// \brief Emit the COFF section that holds the line table information. diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp index a2b9316aa875..48b7104f24c3 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp @@ -30,6 +30,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCWin64EH.h" +#include "llvm/Support/COFF.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" @@ -37,6 +38,7 @@ #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; WinException::WinException(AsmPrinter *A) : EHStreamer(A) { @@ -62,9 +64,9 @@ void WinException::beginFunction(const MachineFunction *MF) { // If any landing pads survive, we need an EH table. bool hasLandingPads = !MMI->getLandingPads().empty(); + bool hasEHFunclets = MMI->hasEHFunclets(); const Function *F = MF->getFunction(); - const Function *ParentF = MMI->getWinEHParent(F); shouldEmitMoves = Asm->needsSEHMoves(); @@ -78,49 +80,23 @@ void WinException::beginFunction(const MachineFunction *MF) { F->hasPersonalityFn() && !isNoOpWithoutInvoke(classifyEHPersonality(Per)) && F->needsUnwindTableEntry(); - shouldEmitPersonality = forceEmitPersonality || (hasLandingPads && - PerEncoding != dwarf::DW_EH_PE_omit && Per); + shouldEmitPersonality = + forceEmitPersonality || ((hasLandingPads || hasEHFunclets) && + PerEncoding != dwarf::DW_EH_PE_omit && Per); unsigned LSDAEncoding = TLOF.getLSDAEncoding(); shouldEmitLSDA = shouldEmitPersonality && LSDAEncoding != dwarf::DW_EH_PE_omit; - // If we're not using CFI, we don't want the CFI or the personality. If - // WinEHPrepare outlined something, we should emit the LSDA. + // If we're not using CFI, we don't want the CFI or the personality, but we + // might want EH tables if we had EH pads. if (!Asm->MAI->usesWindowsCFI()) { - bool HasOutlinedChildren = - F->hasFnAttribute("wineh-parent") && F == ParentF; - shouldEmitLSDA = HasOutlinedChildren; + shouldEmitLSDA = hasEHFunclets; shouldEmitPersonality = false; return; } - // If this was an outlined handler, we need to define the label corresponding - // to the offset of the parent frame relative to the stack pointer after the - // prologue. - if (F != ParentF) { - WinEHFuncInfo &FuncInfo = MMI->getWinEHFuncInfo(ParentF); - auto I = FuncInfo.CatchHandlerParentFrameObjOffset.find(F); - if (I != FuncInfo.CatchHandlerParentFrameObjOffset.end()) { - MCSymbol *HandlerTypeParentFrameOffset = - Asm->OutContext.getOrCreateParentFrameOffsetSymbol( - GlobalValue::getRealLinkageName(F->getName())); - - // Emit a symbol assignment. - Asm->OutStreamer->EmitAssignment( - HandlerTypeParentFrameOffset, - MCConstantExpr::create(I->second, Asm->OutContext)); - } - } - - if (shouldEmitMoves || shouldEmitPersonality) - Asm->OutStreamer->EmitWinCFIStartProc(Asm->CurrentFnSym); - - if (shouldEmitPersonality) { - const MCSymbol *PersHandlerSym = - TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI); - Asm->OutStreamer->EmitWinEHHandler(PersHandlerSym, true, true); - } + beginFunclet(MF->front(), Asm->CurrentFnSym); } /// endFunction - Gather and emit post-function exception information. @@ -134,43 +110,158 @@ void WinException::endFunction(const MachineFunction *MF) { if (F->hasPersonalityFn()) Per = classifyEHPersonality(F->getPersonalityFn()); - // Get rid of any dead landing pads if we're not using a Windows EH scheme. In - // Windows EH schemes, the landing pad is not actually reachable. It only - // exists so that we can emit the right table data. - if (!isMSVCEHPersonality(Per)) + // Get rid of any dead landing pads if we're not using funclets. In funclet + // schemes, the landing pad is not actually reachable. It only exists so + // that we can emit the right table data. + if (!isFuncletEHPersonality(Per)) MMI->TidyLandingPads(); + endFunclet(); + + // endFunclet will emit the necessary .xdata tables for x64 SEH. + if (Per == EHPersonality::MSVC_Win64SEH && MMI->hasEHFunclets()) + return; + if (shouldEmitPersonality || shouldEmitLSDA) { Asm->OutStreamer->PushSection(); - if (shouldEmitMoves || shouldEmitPersonality) { - // Emit an UNWIND_INFO struct describing the prologue. - Asm->OutStreamer->EmitWinEHHandlerData(); - } else { - // Just switch sections to the right xdata section. This use of - // CurrentFnSym assumes that we only emit the LSDA when ending the parent - // function. - MCSection *XData = WinEH::UnwindEmitter::getXDataSection( - Asm->CurrentFnSym, Asm->OutContext); - Asm->OutStreamer->SwitchSection(XData); - } + // Just switch sections to the right xdata section. This use of CurrentFnSym + // assumes that we only emit the LSDA when ending the parent function. + MCSection *XData = WinEH::UnwindEmitter::getXDataSection(Asm->CurrentFnSym, + Asm->OutContext); + Asm->OutStreamer->SwitchSection(XData); // Emit the tables appropriate to the personality function in use. If we // don't recognize the personality, assume it uses an Itanium-style LSDA. if (Per == EHPersonality::MSVC_Win64SEH) - emitCSpecificHandlerTable(); + emitCSpecificHandlerTable(MF); else if (Per == EHPersonality::MSVC_X86SEH) emitExceptHandlerTable(MF); else if (Per == EHPersonality::MSVC_CXX) emitCXXFrameHandler3Table(MF); + else if (Per == EHPersonality::CoreCLR) + emitCLRExceptionTable(MF); else emitExceptionTable(); Asm->OutStreamer->PopSection(); } +} + +/// Retreive the MCSymbol for a GlobalValue or MachineBasicBlock. +static MCSymbol *getMCSymbolForMBB(AsmPrinter *Asm, + const MachineBasicBlock *MBB) { + if (!MBB) + return nullptr; + assert(MBB->isEHFuncletEntry()); + + // Give catches and cleanups a name based off of their parent function and + // their funclet entry block's number. + const MachineFunction *MF = MBB->getParent(); + const Function *F = MF->getFunction(); + StringRef FuncLinkageName = GlobalValue::getRealLinkageName(F->getName()); + MCContext &Ctx = MF->getContext(); + StringRef HandlerPrefix = MBB->isCleanupFuncletEntry() ? "dtor" : "catch"; + return Ctx.getOrCreateSymbol("?" + HandlerPrefix + "$" + + Twine(MBB->getNumber()) + "@?0?" + + FuncLinkageName + "@4HA"); +} + +void WinException::beginFunclet(const MachineBasicBlock &MBB, + MCSymbol *Sym) { + CurrentFuncletEntry = &MBB; + + const Function *F = Asm->MF->getFunction(); + // If a symbol was not provided for the funclet, invent one. + if (!Sym) { + Sym = getMCSymbolForMBB(Asm, &MBB); + + // Describe our funclet symbol as a function with internal linkage. + Asm->OutStreamer->BeginCOFFSymbolDef(Sym); + Asm->OutStreamer->EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC); + Asm->OutStreamer->EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION + << COFF::SCT_COMPLEX_TYPE_SHIFT); + Asm->OutStreamer->EndCOFFSymbolDef(); + + // We want our funclet's entry point to be aligned such that no nops will be + // present after the label. + Asm->EmitAlignment(std::max(Asm->MF->getAlignment(), MBB.getAlignment()), + F); + + // Now that we've emitted the alignment directive, point at our funclet. + Asm->OutStreamer->EmitLabel(Sym); + } + + // Mark 'Sym' as starting our funclet. if (shouldEmitMoves || shouldEmitPersonality) + Asm->OutStreamer->EmitWinCFIStartProc(Sym); + + if (shouldEmitPersonality) { + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + const Function *PerFn = nullptr; + + // Determine which personality routine we are using for this funclet. + if (F->hasPersonalityFn()) + PerFn = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts()); + const MCSymbol *PersHandlerSym = + TLOF.getCFIPersonalitySymbol(PerFn, *Asm->Mang, Asm->TM, MMI); + + // Classify the personality routine so that we may reason about it. + EHPersonality Per = EHPersonality::Unknown; + if (F->hasPersonalityFn()) + Per = classifyEHPersonality(F->getPersonalityFn()); + + // Do not emit a .seh_handler directive if it is a C++ cleanup funclet. + if (Per != EHPersonality::MSVC_CXX || + !CurrentFuncletEntry->isCleanupFuncletEntry()) + Asm->OutStreamer->EmitWinEHHandler(PersHandlerSym, true, true); + } +} + +void WinException::endFunclet() { + // No funclet to process? Great, we have nothing to do. + if (!CurrentFuncletEntry) + return; + + if (shouldEmitMoves || shouldEmitPersonality) { + const Function *F = Asm->MF->getFunction(); + EHPersonality Per = EHPersonality::Unknown; + if (F->hasPersonalityFn()) + Per = classifyEHPersonality(F->getPersonalityFn()); + + // The .seh_handlerdata directive implicitly switches section, push the + // current section so that we may return to it. + Asm->OutStreamer->PushSection(); + + // Emit an UNWIND_INFO struct describing the prologue. + Asm->OutStreamer->EmitWinEHHandlerData(); + + if (Per == EHPersonality::MSVC_CXX && shouldEmitPersonality && + !CurrentFuncletEntry->isCleanupFuncletEntry()) { + // If this is a C++ catch funclet (or the parent function), + // emit a reference to the LSDA for the parent function. + StringRef FuncLinkageName = GlobalValue::getRealLinkageName(F->getName()); + MCSymbol *FuncInfoXData = Asm->OutContext.getOrCreateSymbol( + Twine("$cppxdata$", FuncLinkageName)); + Asm->OutStreamer->EmitValue(create32bitRef(FuncInfoXData), 4); + } else if (Per == EHPersonality::MSVC_Win64SEH && MMI->hasEHFunclets() && + !CurrentFuncletEntry->isEHFuncletEntry()) { + // If this is the parent function in Win64 SEH, emit the LSDA immediately + // following .seh_handlerdata. + emitCSpecificHandlerTable(Asm->MF); + } + + // Switch back to the previous section now that we are done writing to + // .xdata. + Asm->OutStreamer->PopSection(); + + // Emit a .seh_endproc directive to mark the end of the function. Asm->OutStreamer->EmitWinCFIEndProc(); + } + + // Let's make sure we don't try to end the same funclet twice. + CurrentFuncletEntry = nullptr; } const MCExpr *WinException::create32bitRef(const MCSymbol *Value) { @@ -188,6 +279,202 @@ const MCExpr *WinException::create32bitRef(const GlobalValue *GV) { return create32bitRef(Asm->getSymbol(GV)); } +const MCExpr *WinException::getLabelPlusOne(const MCSymbol *Label) { + return MCBinaryExpr::createAdd(create32bitRef(Label), + MCConstantExpr::create(1, Asm->OutContext), + Asm->OutContext); +} + +const MCExpr *WinException::getOffset(const MCSymbol *OffsetOf, + const MCSymbol *OffsetFrom) { + return MCBinaryExpr::createSub( + MCSymbolRefExpr::create(OffsetOf, Asm->OutContext), + MCSymbolRefExpr::create(OffsetFrom, Asm->OutContext), Asm->OutContext); +} + +const MCExpr *WinException::getOffsetPlusOne(const MCSymbol *OffsetOf, + const MCSymbol *OffsetFrom) { + return MCBinaryExpr::createAdd(getOffset(OffsetOf, OffsetFrom), + MCConstantExpr::create(1, Asm->OutContext), + Asm->OutContext); +} + +int WinException::getFrameIndexOffset(int FrameIndex, + const WinEHFuncInfo &FuncInfo) { + const TargetFrameLowering &TFI = *Asm->MF->getSubtarget().getFrameLowering(); + unsigned UnusedReg; + if (Asm->MAI->usesWindowsCFI()) + return TFI.getFrameIndexReferenceFromSP(*Asm->MF, FrameIndex, UnusedReg); + // For 32-bit, offsets should be relative to the end of the EH registration + // node. For 64-bit, it's relative to SP at the end of the prologue. + assert(FuncInfo.EHRegNodeEndOffset != INT_MAX); + int Offset = TFI.getFrameIndexReference(*Asm->MF, FrameIndex, UnusedReg); + Offset += FuncInfo.EHRegNodeEndOffset; + return Offset; +} + +namespace { + +/// Top-level state used to represent unwind to caller +const int NullState = -1; + +struct InvokeStateChange { + /// EH Label immediately after the last invoke in the previous state, or + /// nullptr if the previous state was the null state. + const MCSymbol *PreviousEndLabel; + + /// EH label immediately before the first invoke in the new state, or nullptr + /// if the new state is the null state. + const MCSymbol *NewStartLabel; + + /// State of the invoke following NewStartLabel, or NullState to indicate + /// the presence of calls which may unwind to caller. + int NewState; +}; + +/// Iterator that reports all the invoke state changes in a range of machine +/// basic blocks. Changes to the null state are reported whenever a call that +/// may unwind to caller is encountered. The MBB range is expected to be an +/// entire function or funclet, and the start and end of the range are treated +/// as being in the NullState even if there's not an unwind-to-caller call +/// before the first invoke or after the last one (i.e., the first state change +/// reported is the first change to something other than NullState, and a +/// change back to NullState is always reported at the end of iteration). +class InvokeStateChangeIterator { + InvokeStateChangeIterator(const WinEHFuncInfo &EHInfo, + MachineFunction::const_iterator MFI, + MachineFunction::const_iterator MFE, + MachineBasicBlock::const_iterator MBBI, + int BaseState) + : EHInfo(EHInfo), MFI(MFI), MFE(MFE), MBBI(MBBI), BaseState(BaseState) { + LastStateChange.PreviousEndLabel = nullptr; + LastStateChange.NewStartLabel = nullptr; + LastStateChange.NewState = BaseState; + scan(); + } + +public: + static iterator_range<InvokeStateChangeIterator> + range(const WinEHFuncInfo &EHInfo, MachineFunction::const_iterator Begin, + MachineFunction::const_iterator End, int BaseState = NullState) { + // Reject empty ranges to simplify bookkeeping by ensuring that we can get + // the end of the last block. + assert(Begin != End); + auto BlockBegin = Begin->begin(); + auto BlockEnd = std::prev(End)->end(); + return make_range( + InvokeStateChangeIterator(EHInfo, Begin, End, BlockBegin, BaseState), + InvokeStateChangeIterator(EHInfo, End, End, BlockEnd, BaseState)); + } + + // Iterator methods. + bool operator==(const InvokeStateChangeIterator &O) const { + assert(BaseState == O.BaseState); + // Must be visiting same block. + if (MFI != O.MFI) + return false; + // Must be visiting same isntr. + if (MBBI != O.MBBI) + return false; + // At end of block/instr iteration, we can still have two distinct states: + // one to report the final EndLabel, and another indicating the end of the + // state change iteration. Check for CurrentEndLabel equality to + // distinguish these. + return CurrentEndLabel == O.CurrentEndLabel; + } + + bool operator!=(const InvokeStateChangeIterator &O) const { + return !operator==(O); + } + InvokeStateChange &operator*() { return LastStateChange; } + InvokeStateChange *operator->() { return &LastStateChange; } + InvokeStateChangeIterator &operator++() { return scan(); } + +private: + InvokeStateChangeIterator &scan(); + + const WinEHFuncInfo &EHInfo; + const MCSymbol *CurrentEndLabel = nullptr; + MachineFunction::const_iterator MFI; + MachineFunction::const_iterator MFE; + MachineBasicBlock::const_iterator MBBI; + InvokeStateChange LastStateChange; + bool VisitingInvoke = false; + int BaseState; +}; + +} // end anonymous namespace + +InvokeStateChangeIterator &InvokeStateChangeIterator::scan() { + bool IsNewBlock = false; + for (; MFI != MFE; ++MFI, IsNewBlock = true) { + if (IsNewBlock) + MBBI = MFI->begin(); + for (auto MBBE = MFI->end(); MBBI != MBBE; ++MBBI) { + const MachineInstr &MI = *MBBI; + if (!VisitingInvoke && LastStateChange.NewState != BaseState && + MI.isCall() && !EHStreamer::callToNoUnwindFunction(&MI)) { + // Indicate a change of state to the null state. We don't have + // start/end EH labels handy but the caller won't expect them for + // null state regions. + LastStateChange.PreviousEndLabel = CurrentEndLabel; + LastStateChange.NewStartLabel = nullptr; + LastStateChange.NewState = BaseState; + CurrentEndLabel = nullptr; + // Don't re-visit this instr on the next scan + ++MBBI; + return *this; + } + + // All other state changes are at EH labels before/after invokes. + if (!MI.isEHLabel()) + continue; + MCSymbol *Label = MI.getOperand(0).getMCSymbol(); + if (Label == CurrentEndLabel) { + VisitingInvoke = false; + continue; + } + auto InvokeMapIter = EHInfo.LabelToStateMap.find(Label); + // Ignore EH labels that aren't the ones inserted before an invoke + if (InvokeMapIter == EHInfo.LabelToStateMap.end()) + continue; + auto &StateAndEnd = InvokeMapIter->second; + int NewState = StateAndEnd.first; + // Keep track of the fact that we're between EH start/end labels so + // we know not to treat the inoke we'll see as unwinding to caller. + VisitingInvoke = true; + if (NewState == LastStateChange.NewState) { + // The state isn't actually changing here. Record the new end and + // keep going. + CurrentEndLabel = StateAndEnd.second; + continue; + } + // Found a state change to report + LastStateChange.PreviousEndLabel = CurrentEndLabel; + LastStateChange.NewStartLabel = Label; + LastStateChange.NewState = NewState; + // Start keeping track of the new current end + CurrentEndLabel = StateAndEnd.second; + // Don't re-visit this instr on the next scan + ++MBBI; + return *this; + } + } + // Iteration hit the end of the block range. + if (LastStateChange.NewState != BaseState) { + // Report the end of the last new state + LastStateChange.PreviousEndLabel = CurrentEndLabel; + LastStateChange.NewStartLabel = nullptr; + LastStateChange.NewState = BaseState; + // Leave CurrentEndLabel non-null to distinguish this state from end. + assert(CurrentEndLabel != nullptr); + return *this; + } + // We've reported all state changes and hit the end state. + CurrentEndLabel = nullptr; + return *this; +} + /// Emit the language-specific data that __C_specific_handler expects. This /// handler lives in the x64 Microsoft C runtime and allows catching or cleaning /// up after faults with __try, __except, and __finally. The typeinfo values @@ -216,135 +503,156 @@ const MCExpr *WinException::create32bitRef(const GlobalValue *GV) { /// imagerel32 LabelLPad; // Zero means __finally. /// } Entries[NumEntries]; /// }; -void WinException::emitCSpecificHandlerTable() { - const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads(); - - // Simplifying assumptions for first implementation: - // - Cleanups are not implemented. - // - Filters are not implemented. - - // The Itanium LSDA table sorts similar landing pads together to simplify the - // actions table, but we don't need that. - SmallVector<const LandingPadInfo *, 64> LandingPads; - LandingPads.reserve(PadInfos.size()); - for (const auto &LP : PadInfos) - LandingPads.push_back(&LP); - - // Compute label ranges for call sites as we would for the Itanium LSDA, but - // use an all zero action table because we aren't using these actions. - SmallVector<unsigned, 64> FirstActions; - FirstActions.resize(LandingPads.size()); - SmallVector<CallSiteEntry, 64> CallSites; - computeCallSiteTable(CallSites, LandingPads, FirstActions); - - MCSymbol *EHFuncBeginSym = Asm->getFunctionBegin(); - MCSymbol *EHFuncEndSym = Asm->getFunctionEnd(); - - // Emit the number of table entries. - unsigned NumEntries = 0; - for (const CallSiteEntry &CSE : CallSites) { - if (!CSE.LPad) - continue; // Ignore gaps. - NumEntries += CSE.LPad->SEHHandlers.size(); +void WinException::emitCSpecificHandlerTable(const MachineFunction *MF) { + auto &OS = *Asm->OutStreamer; + MCContext &Ctx = Asm->OutContext; + const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo(); + + bool VerboseAsm = OS.isVerboseAsm(); + auto AddComment = [&](const Twine &Comment) { + if (VerboseAsm) + OS.AddComment(Comment); + }; + + // Emit a label assignment with the SEH frame offset so we can use it for + // llvm.x86.seh.recoverfp. + StringRef FLinkageName = + GlobalValue::getRealLinkageName(MF->getFunction()->getName()); + MCSymbol *ParentFrameOffset = + Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName); + const MCExpr *MCOffset = + MCConstantExpr::create(FuncInfo.SEHSetFrameOffset, Ctx); + Asm->OutStreamer->EmitAssignment(ParentFrameOffset, MCOffset); + + // Use the assembler to compute the number of table entries through label + // difference and division. + MCSymbol *TableBegin = + Ctx.createTempSymbol("lsda_begin", /*AlwaysAddSuffix=*/true); + MCSymbol *TableEnd = + Ctx.createTempSymbol("lsda_end", /*AlwaysAddSuffix=*/true); + const MCExpr *LabelDiff = getOffset(TableEnd, TableBegin); + const MCExpr *EntrySize = MCConstantExpr::create(16, Ctx); + const MCExpr *EntryCount = MCBinaryExpr::createDiv(LabelDiff, EntrySize, Ctx); + AddComment("Number of call sites"); + OS.EmitValue(EntryCount, 4); + + OS.EmitLabel(TableBegin); + + // Iterate over all the invoke try ranges. Unlike MSVC, LLVM currently only + // models exceptions from invokes. LLVM also allows arbitrary reordering of + // the code, so our tables end up looking a bit different. Rather than + // trying to match MSVC's tables exactly, we emit a denormalized table. For + // each range of invokes in the same state, we emit table entries for all + // the actions that would be taken in that state. This means our tables are + // slightly bigger, which is OK. + const MCSymbol *LastStartLabel = nullptr; + int LastEHState = -1; + // Break out before we enter into a finally funclet. + // FIXME: We need to emit separate EH tables for cleanups. + MachineFunction::const_iterator End = MF->end(); + MachineFunction::const_iterator Stop = std::next(MF->begin()); + while (Stop != End && !Stop->isEHFuncletEntry()) + ++Stop; + for (const auto &StateChange : + InvokeStateChangeIterator::range(FuncInfo, MF->begin(), Stop)) { + // Emit all the actions for the state we just transitioned out of + // if it was not the null state + if (LastEHState != -1) + emitSEHActionsForRange(FuncInfo, LastStartLabel, + StateChange.PreviousEndLabel, LastEHState); + LastStartLabel = StateChange.NewStartLabel; + LastEHState = StateChange.NewState; } - Asm->OutStreamer->EmitIntValue(NumEntries, 4); - // If there are no actions, we don't need to iterate again. - if (NumEntries == 0) - return; + OS.EmitLabel(TableEnd); +} - // Emit the four-label records for each call site entry. The table has to be - // sorted in layout order, and the call sites should already be sorted. - for (const CallSiteEntry &CSE : CallSites) { - // Ignore gaps. Unlike the Itanium model, unwinding through a frame without - // an EH table entry will propagate the exception rather than terminating - // the program. - if (!CSE.LPad) - continue; - const LandingPadInfo *LPad = CSE.LPad; - - // Compute the label range. We may reuse the function begin and end labels - // rather than forming new ones. - const MCExpr *Begin = - create32bitRef(CSE.BeginLabel ? CSE.BeginLabel : EHFuncBeginSym); - const MCExpr *End; - if (CSE.EndLabel) { - // The interval is half-open, so we have to add one to include the return - // address of the last invoke in the range. - End = MCBinaryExpr::createAdd(create32bitRef(CSE.EndLabel), - MCConstantExpr::create(1, Asm->OutContext), - Asm->OutContext); +void WinException::emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo, + const MCSymbol *BeginLabel, + const MCSymbol *EndLabel, int State) { + auto &OS = *Asm->OutStreamer; + MCContext &Ctx = Asm->OutContext; + + bool VerboseAsm = OS.isVerboseAsm(); + auto AddComment = [&](const Twine &Comment) { + if (VerboseAsm) + OS.AddComment(Comment); + }; + + assert(BeginLabel && EndLabel); + while (State != -1) { + const SEHUnwindMapEntry &UME = FuncInfo.SEHUnwindMap[State]; + const MCExpr *FilterOrFinally; + const MCExpr *ExceptOrNull; + auto *Handler = UME.Handler.get<MachineBasicBlock *>(); + if (UME.IsFinally) { + FilterOrFinally = create32bitRef(getMCSymbolForMBB(Asm, Handler)); + ExceptOrNull = MCConstantExpr::create(0, Ctx); } else { - End = create32bitRef(EHFuncEndSym); + // For an except, the filter can be 1 (catch-all) or a function + // label. + FilterOrFinally = UME.Filter ? create32bitRef(UME.Filter) + : MCConstantExpr::create(1, Ctx); + ExceptOrNull = create32bitRef(Handler->getSymbol()); } - // Emit an entry for each action. - for (SEHHandler Handler : LPad->SEHHandlers) { - Asm->OutStreamer->EmitValue(Begin, 4); - Asm->OutStreamer->EmitValue(End, 4); - - // Emit the filter or finally function pointer, if present. Otherwise, - // emit '1' to indicate a catch-all. - const Function *F = Handler.FilterOrFinally; - if (F) - Asm->OutStreamer->EmitValue(create32bitRef(Asm->getSymbol(F)), 4); - else - Asm->OutStreamer->EmitIntValue(1, 4); - - // Emit the recovery address, if present. Otherwise, this must be a - // finally. - const BlockAddress *BA = Handler.RecoverBA; - if (BA) - Asm->OutStreamer->EmitValue( - create32bitRef(Asm->GetBlockAddressSymbol(BA)), 4); - else - Asm->OutStreamer->EmitIntValue(0, 4); - } + AddComment("LabelStart"); + OS.EmitValue(getLabelPlusOne(BeginLabel), 4); + AddComment("LabelEnd"); + OS.EmitValue(getLabelPlusOne(EndLabel), 4); + AddComment(UME.IsFinally ? "FinallyFunclet" : UME.Filter ? "FilterFunction" + : "CatchAll"); + OS.EmitValue(FilterOrFinally, 4); + AddComment(UME.IsFinally ? "Null" : "ExceptionHandler"); + OS.EmitValue(ExceptOrNull, 4); + + assert(UME.ToState < State && "states should decrease"); + State = UME.ToState; } } void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) { const Function *F = MF->getFunction(); - const Function *ParentF = MMI->getWinEHParent(F); auto &OS = *Asm->OutStreamer; - WinEHFuncInfo &FuncInfo = MMI->getWinEHFuncInfo(ParentF); + const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo(); - StringRef ParentLinkageName = - GlobalValue::getRealLinkageName(ParentF->getName()); + StringRef FuncLinkageName = GlobalValue::getRealLinkageName(F->getName()); + SmallVector<std::pair<const MCExpr *, int>, 4> IPToStateTable; MCSymbol *FuncInfoXData = nullptr; if (shouldEmitPersonality) { - FuncInfoXData = Asm->OutContext.getOrCreateSymbol( - Twine("$cppxdata$", ParentLinkageName)); - OS.EmitValue(create32bitRef(FuncInfoXData), 4); - - extendIP2StateTable(MF, ParentF, FuncInfo); - - // Defer emission until we've visited the parent function and all the catch - // handlers. Cleanups don't contribute to the ip2state table, so don't count - // them. - if (ParentF != F && !FuncInfo.CatchHandlerMaxState.count(F)) - return; - ++FuncInfo.NumIPToStateFuncsVisited; - if (FuncInfo.NumIPToStateFuncsVisited != FuncInfo.CatchHandlerMaxState.size()) - return; + // If we're 64-bit, emit a pointer to the C++ EH data, and build a map from + // IPs to state numbers. + FuncInfoXData = + Asm->OutContext.getOrCreateSymbol(Twine("$cppxdata$", FuncLinkageName)); + computeIP2StateTable(MF, FuncInfo, IPToStateTable); } else { - FuncInfoXData = Asm->OutContext.getOrCreateLSDASymbol(ParentLinkageName); - emitEHRegistrationOffsetLabel(FuncInfo, ParentLinkageName); + FuncInfoXData = Asm->OutContext.getOrCreateLSDASymbol(FuncLinkageName); } + int UnwindHelpOffset = 0; + if (Asm->MAI->usesWindowsCFI()) + UnwindHelpOffset = + getFrameIndexOffset(FuncInfo.UnwindHelpFrameIdx, FuncInfo); + MCSymbol *UnwindMapXData = nullptr; MCSymbol *TryBlockMapXData = nullptr; MCSymbol *IPToStateXData = nullptr; - if (!FuncInfo.UnwindMap.empty()) + if (!FuncInfo.CxxUnwindMap.empty()) UnwindMapXData = Asm->OutContext.getOrCreateSymbol( - Twine("$stateUnwindMap$", ParentLinkageName)); + Twine("$stateUnwindMap$", FuncLinkageName)); if (!FuncInfo.TryBlockMap.empty()) - TryBlockMapXData = Asm->OutContext.getOrCreateSymbol( - Twine("$tryMap$", ParentLinkageName)); - if (!FuncInfo.IPToStateList.empty()) - IPToStateXData = Asm->OutContext.getOrCreateSymbol( - Twine("$ip2state$", ParentLinkageName)); + TryBlockMapXData = + Asm->OutContext.getOrCreateSymbol(Twine("$tryMap$", FuncLinkageName)); + if (!IPToStateTable.empty()) + IPToStateXData = + Asm->OutContext.getOrCreateSymbol(Twine("$ip2state$", FuncLinkageName)); + + bool VerboseAsm = OS.isVerboseAsm(); + auto AddComment = [&](const Twine &Comment) { + if (VerboseAsm) + OS.AddComment(Comment); + }; // FuncInfo { // uint32_t MagicNumber @@ -363,17 +671,38 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) { // EHFlags & 4 -> The function is noexcept(true), unwinding can't continue. OS.EmitValueToAlignment(4); OS.EmitLabel(FuncInfoXData); - OS.EmitIntValue(0x19930522, 4); // MagicNumber - OS.EmitIntValue(FuncInfo.UnwindMap.size(), 4); // MaxState - OS.EmitValue(create32bitRef(UnwindMapXData), 4); // UnwindMap - OS.EmitIntValue(FuncInfo.TryBlockMap.size(), 4); // NumTryBlocks - OS.EmitValue(create32bitRef(TryBlockMapXData), 4); // TryBlockMap - OS.EmitIntValue(FuncInfo.IPToStateList.size(), 4); // IPMapEntries - OS.EmitValue(create32bitRef(IPToStateXData), 4); // IPToStateMap - if (Asm->MAI->usesWindowsCFI()) - OS.EmitIntValue(FuncInfo.UnwindHelpFrameOffset, 4); // UnwindHelp - OS.EmitIntValue(0, 4); // ESTypeList - OS.EmitIntValue(1, 4); // EHFlags + + AddComment("MagicNumber"); + OS.EmitIntValue(0x19930522, 4); + + AddComment("MaxState"); + OS.EmitIntValue(FuncInfo.CxxUnwindMap.size(), 4); + + AddComment("UnwindMap"); + OS.EmitValue(create32bitRef(UnwindMapXData), 4); + + AddComment("NumTryBlocks"); + OS.EmitIntValue(FuncInfo.TryBlockMap.size(), 4); + + AddComment("TryBlockMap"); + OS.EmitValue(create32bitRef(TryBlockMapXData), 4); + + AddComment("IPMapEntries"); + OS.EmitIntValue(IPToStateTable.size(), 4); + + AddComment("IPToStateXData"); + OS.EmitValue(create32bitRef(IPToStateXData), 4); + + if (Asm->MAI->usesWindowsCFI()) { + AddComment("UnwindHelp"); + OS.EmitIntValue(UnwindHelpOffset, 4); + } + + AddComment("ESTypeList"); + OS.EmitIntValue(0, 4); + + AddComment("EHFlags"); + OS.EmitIntValue(1, 4); // UnwindMapEntry { // int32_t ToState; @@ -381,9 +710,14 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) { // }; if (UnwindMapXData) { OS.EmitLabel(UnwindMapXData); - for (const WinEHUnwindMapEntry &UME : FuncInfo.UnwindMap) { - OS.EmitIntValue(UME.ToState, 4); // ToState - OS.EmitValue(create32bitRef(UME.Cleanup), 4); // Action + for (const CxxUnwindMapEntry &UME : FuncInfo.CxxUnwindMap) { + MCSymbol *CleanupSym = + getMCSymbolForMBB(Asm, UME.Cleanup.dyn_cast<MachineBasicBlock *>()); + AddComment("ToState"); + OS.EmitIntValue(UME.ToState, 4); + + AddComment("Action"); + OS.EmitValue(create32bitRef(CleanupSym), 4); } } @@ -398,33 +732,49 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) { OS.EmitLabel(TryBlockMapXData); SmallVector<MCSymbol *, 1> HandlerMaps; for (size_t I = 0, E = FuncInfo.TryBlockMap.size(); I != E; ++I) { - WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I]; - MCSymbol *HandlerMapXData = nullptr; + const WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I]; + MCSymbol *HandlerMapXData = nullptr; if (!TBME.HandlerArray.empty()) HandlerMapXData = Asm->OutContext.getOrCreateSymbol(Twine("$handlerMap$") .concat(Twine(I)) .concat("$") - .concat(ParentLinkageName)); - + .concat(FuncLinkageName)); HandlerMaps.push_back(HandlerMapXData); - int CatchHigh = -1; - for (WinEHHandlerType &HT : TBME.HandlerArray) - CatchHigh = - std::max(CatchHigh, FuncInfo.CatchHandlerMaxState[HT.Handler]); - - assert(TBME.TryLow <= TBME.TryHigh); - OS.EmitIntValue(TBME.TryLow, 4); // TryLow - OS.EmitIntValue(TBME.TryHigh, 4); // TryHigh - OS.EmitIntValue(CatchHigh, 4); // CatchHigh - OS.EmitIntValue(TBME.HandlerArray.size(), 4); // NumCatches - OS.EmitValue(create32bitRef(HandlerMapXData), 4); // HandlerArray + // TBMEs should form intervals. + assert(0 <= TBME.TryLow && "bad trymap interval"); + assert(TBME.TryLow <= TBME.TryHigh && "bad trymap interval"); + assert(TBME.TryHigh < TBME.CatchHigh && "bad trymap interval"); + assert(TBME.CatchHigh < int(FuncInfo.CxxUnwindMap.size()) && + "bad trymap interval"); + + AddComment("TryLow"); + OS.EmitIntValue(TBME.TryLow, 4); + + AddComment("TryHigh"); + OS.EmitIntValue(TBME.TryHigh, 4); + + AddComment("CatchHigh"); + OS.EmitIntValue(TBME.CatchHigh, 4); + + AddComment("NumCatches"); + OS.EmitIntValue(TBME.HandlerArray.size(), 4); + + AddComment("HandlerArray"); + OS.EmitValue(create32bitRef(HandlerMapXData), 4); + } + + // All funclets use the same parent frame offset currently. + unsigned ParentFrameOffset = 0; + if (shouldEmitPersonality) { + const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); + ParentFrameOffset = TFI->getWinEHParentFrameOffset(*MF); } for (size_t I = 0, E = FuncInfo.TryBlockMap.size(); I != E; ++I) { - WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I]; + const WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I]; MCSymbol *HandlerMapXData = HandlerMaps[I]; if (!HandlerMapXData) continue; @@ -438,32 +788,34 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) { OS.EmitLabel(HandlerMapXData); for (const WinEHHandlerType &HT : TBME.HandlerArray) { // Get the frame escape label with the offset of the catch object. If - // the index is -1, then there is no catch object, and we should emit an - // offset of zero, indicating that no copy will occur. + // the index is INT_MAX, then there is no catch object, and we should + // emit an offset of zero, indicating that no copy will occur. const MCExpr *FrameAllocOffsetRef = nullptr; - if (HT.CatchObjRecoverIdx >= 0) { - MCSymbol *FrameAllocOffset = - Asm->OutContext.getOrCreateFrameAllocSymbol( - GlobalValue::getRealLinkageName(ParentF->getName()), - HT.CatchObjRecoverIdx); - FrameAllocOffsetRef = MCSymbolRefExpr::create( - FrameAllocOffset, MCSymbolRefExpr::VK_None, Asm->OutContext); + if (HT.CatchObj.FrameIndex != INT_MAX) { + int Offset = getFrameIndexOffset(HT.CatchObj.FrameIndex, FuncInfo); + FrameAllocOffsetRef = MCConstantExpr::create(Offset, Asm->OutContext); } else { FrameAllocOffsetRef = MCConstantExpr::create(0, Asm->OutContext); } - OS.EmitIntValue(HT.Adjectives, 4); // Adjectives - OS.EmitValue(create32bitRef(HT.TypeDescriptor), 4); // Type - OS.EmitValue(FrameAllocOffsetRef, 4); // CatchObjOffset - OS.EmitValue(create32bitRef(HT.Handler), 4); // Handler + MCSymbol *HandlerSym = + getMCSymbolForMBB(Asm, HT.Handler.dyn_cast<MachineBasicBlock *>()); + + AddComment("Adjectives"); + OS.EmitIntValue(HT.Adjectives, 4); + + AddComment("Type"); + OS.EmitValue(create32bitRef(HT.TypeDescriptor), 4); + + AddComment("CatchObjOffset"); + OS.EmitValue(FrameAllocOffsetRef, 4); + + AddComment("Handler"); + OS.EmitValue(create32bitRef(HandlerSym), 4); if (shouldEmitPersonality) { - MCSymbol *ParentFrameOffset = - Asm->OutContext.getOrCreateParentFrameOffsetSymbol( - GlobalValue::getRealLinkageName(HT.Handler->getName())); - const MCSymbolRefExpr *ParentFrameOffsetRef = MCSymbolRefExpr::create( - ParentFrameOffset, Asm->OutContext); - OS.EmitValue(ParentFrameOffsetRef, 4); // ParentFrameOffset + AddComment("ParentFrameOffset"); + OS.EmitIntValue(ParentFrameOffset, 4); } } } @@ -475,87 +827,65 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) { // }; if (IPToStateXData) { OS.EmitLabel(IPToStateXData); - for (auto &IPStatePair : FuncInfo.IPToStateList) { - OS.EmitValue(create32bitRef(IPStatePair.first), 4); // IP - OS.EmitIntValue(IPStatePair.second, 4); // State + for (auto &IPStatePair : IPToStateTable) { + AddComment("IP"); + OS.EmitValue(IPStatePair.first, 4); + AddComment("ToState"); + OS.EmitIntValue(IPStatePair.second, 4); } } } -void WinException::extendIP2StateTable(const MachineFunction *MF, - const Function *ParentF, - WinEHFuncInfo &FuncInfo) { - const Function *F = MF->getFunction(); - - // The Itanium LSDA table sorts similar landing pads together to simplify the - // actions table, but we don't need that. - SmallVector<const LandingPadInfo *, 64> LandingPads; - const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads(); - LandingPads.reserve(PadInfos.size()); - for (const auto &LP : PadInfos) - LandingPads.push_back(&LP); - - RangeMapType PadMap; - computePadMap(LandingPads, PadMap); - - // The end label of the previous invoke or nounwind try-range. - MCSymbol *LastLabel = Asm->getFunctionBegin(); - - // Whether there is a potentially throwing instruction (currently this means - // an ordinary call) between the end of the previous try-range and now. - bool SawPotentiallyThrowing = false; - - int LastEHState = -2; - - // The parent function and the catch handlers contribute to the 'ip2state' - // table. - - // Include ip2state entries for the beginning of the main function and - // for catch handler functions. - if (F == ParentF) { - FuncInfo.IPToStateList.push_back(std::make_pair(LastLabel, -1)); - LastEHState = -1; - } else if (FuncInfo.HandlerBaseState.count(F)) { - FuncInfo.IPToStateList.push_back( - std::make_pair(LastLabel, FuncInfo.HandlerBaseState[F])); - LastEHState = FuncInfo.HandlerBaseState[F]; - } - for (const auto &MBB : *MF) { - for (const auto &MI : MBB) { - if (!MI.isEHLabel()) { - if (MI.isCall()) - SawPotentiallyThrowing |= !callToNoUnwindFunction(&MI); - continue; +void WinException::computeIP2StateTable( + const MachineFunction *MF, const WinEHFuncInfo &FuncInfo, + SmallVectorImpl<std::pair<const MCExpr *, int>> &IPToStateTable) { + + for (MachineFunction::const_iterator FuncletStart = MF->begin(), + FuncletEnd = MF->begin(), + End = MF->end(); + FuncletStart != End; FuncletStart = FuncletEnd) { + // Find the end of the funclet + while (++FuncletEnd != End) { + if (FuncletEnd->isEHFuncletEntry()) { + break; } + } - // End of the previous try-range? - MCSymbol *BeginLabel = MI.getOperand(0).getMCSymbol(); - if (BeginLabel == LastLabel) - SawPotentiallyThrowing = false; - - // Beginning of a new try-range? - RangeMapType::const_iterator L = PadMap.find(BeginLabel); - if (L == PadMap.end()) - // Nope, it was just some random label. - continue; - - const PadRange &P = L->second; - const LandingPadInfo *LandingPad = LandingPads[P.PadIndex]; - assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] && - "Inconsistent landing pad map!"); - - // FIXME: Should this be using FuncInfo.HandlerBaseState? - if (SawPotentiallyThrowing && LastEHState != -1) { - FuncInfo.IPToStateList.push_back(std::make_pair(LastLabel, -1)); - SawPotentiallyThrowing = false; - LastEHState = -1; - } + // Don't emit ip2state entries for cleanup funclets. Any interesting + // exceptional actions in cleanups must be handled in a separate IR + // function. + if (FuncletStart->isCleanupFuncletEntry()) + continue; - if (LandingPad->WinEHState != LastEHState) - FuncInfo.IPToStateList.push_back( - std::make_pair(BeginLabel, LandingPad->WinEHState)); - LastEHState = LandingPad->WinEHState; - LastLabel = LandingPad->EndLabels[P.RangeIndex]; + MCSymbol *StartLabel; + int BaseState; + if (FuncletStart == MF->begin()) { + BaseState = NullState; + StartLabel = Asm->getFunctionBegin(); + } else { + auto *FuncletPad = + cast<FuncletPadInst>(FuncletStart->getBasicBlock()->getFirstNonPHI()); + assert(FuncInfo.FuncletBaseStateMap.count(FuncletPad) != 0); + BaseState = FuncInfo.FuncletBaseStateMap.find(FuncletPad)->second; + StartLabel = getMCSymbolForMBB(Asm, &*FuncletStart); + } + assert(StartLabel && "need local function start label"); + IPToStateTable.push_back( + std::make_pair(create32bitRef(StartLabel), BaseState)); + + for (const auto &StateChange : InvokeStateChangeIterator::range( + FuncInfo, FuncletStart, FuncletEnd, BaseState)) { + // Compute the label to report as the start of this entry; use the EH + // start label for the invoke if we have one, otherwise (this is a call + // which may unwind to our caller and does not have an EH start label, so) + // use the previous end label. + const MCSymbol *ChangeLabel = StateChange.NewStartLabel; + if (!ChangeLabel) + ChangeLabel = StateChange.PreviousEndLabel; + // Emit an entry indicating that PCs after 'Label' have this EH state. + IPToStateTable.push_back( + std::make_pair(getLabelPlusOne(ChangeLabel), StateChange.NewState)); + // FIXME: assert that NewState is between CatchLow and CatchHigh. } } } @@ -566,15 +896,15 @@ void WinException::emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo, // registration in order to recover the parent frame pointer. Now that we know // we've code generated the parent, we can emit the label assignment that // those helpers use to get the offset of the registration node. - assert(FuncInfo.EHRegNodeEscapeIndex != INT_MAX && - "no EH reg node localescape index"); + MCContext &Ctx = Asm->OutContext; MCSymbol *ParentFrameOffset = - Asm->OutContext.getOrCreateParentFrameOffsetSymbol(FLinkageName); - MCSymbol *RegistrationOffsetSym = Asm->OutContext.getOrCreateFrameAllocSymbol( - FLinkageName, FuncInfo.EHRegNodeEscapeIndex); - const MCExpr *RegistrationOffsetSymRef = - MCSymbolRefExpr::create(RegistrationOffsetSym, Asm->OutContext); - Asm->OutStreamer->EmitAssignment(ParentFrameOffset, RegistrationOffsetSymRef); + Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName); + unsigned UnusedReg; + const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering(); + int64_t Offset = TFI->getFrameIndexReference( + *Asm->MF, FuncInfo.EHRegNodeFrameIndex, UnusedReg); + const MCExpr *MCOffset = MCConstantExpr::create(Offset, Ctx); + Asm->OutStreamer->EmitAssignment(ParentFrameOffset, MCOffset); } /// Emit the language-specific data that _except_handler3 and 4 expect. This is @@ -585,7 +915,13 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) { const Function *F = MF->getFunction(); StringRef FLinkageName = GlobalValue::getRealLinkageName(F->getName()); - WinEHFuncInfo &FuncInfo = MMI->getWinEHFuncInfo(F); + bool VerboseAsm = OS.isVerboseAsm(); + auto AddComment = [&](const Twine &Comment) { + if (VerboseAsm) + OS.AddComment(Comment); + }; + + const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo(); emitEHRegistrationOffsetLabel(FuncInfo, FLinkageName); // Emit the __ehtable label that we use for llvm.x86.seh.lsda. @@ -611,58 +947,290 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) { // // Only the EHCookieOffset field appears to vary, and it appears to be the // offset from the final saved SP value to the retaddr. + AddComment("GSCookieOffset"); OS.EmitIntValue(-2, 4); + AddComment("GSCookieXOROffset"); OS.EmitIntValue(0, 4); // FIXME: Calculate. + AddComment("EHCookieOffset"); OS.EmitIntValue(9999, 4); + AddComment("EHCookieXOROffset"); OS.EmitIntValue(0, 4); BaseState = -2; } - // Build a list of pointers to LandingPadInfos and then sort by WinEHState. - const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads(); - SmallVector<const LandingPadInfo *, 4> LPads; - LPads.reserve((PadInfos.size())); - for (const LandingPadInfo &LPInfo : PadInfos) - LPads.push_back(&LPInfo); - std::sort(LPads.begin(), LPads.end(), - [](const LandingPadInfo *L, const LandingPadInfo *R) { - return L->WinEHState < R->WinEHState; - }); - - // For each action in each lpad, emit one of these: - // struct ScopeTableEntry { - // int32_t EnclosingLevel; - // int32_t (__cdecl *Filter)(); - // void *HandlerOrFinally; - // }; - // - // The "outermost" action will use BaseState as its enclosing level. Each - // other action will refer to the previous state as its enclosing level. - int CurState = 0; - for (const LandingPadInfo *LPInfo : LPads) { - int EnclosingLevel = BaseState; - assert(CurState + int(LPInfo->SEHHandlers.size()) - 1 == - LPInfo->WinEHState && - "gaps in the SEH scope table"); - for (auto I = LPInfo->SEHHandlers.rbegin(), E = LPInfo->SEHHandlers.rend(); - I != E; ++I) { - const SEHHandler &Handler = *I; - const BlockAddress *BA = Handler.RecoverBA; - const Function *F = Handler.FilterOrFinally; - assert(F && "cannot catch all in 32-bit SEH without filter function"); - const MCExpr *FilterOrNull = - create32bitRef(BA ? Asm->getSymbol(F) : nullptr); - const MCExpr *ExceptOrFinally = create32bitRef( - BA ? Asm->GetBlockAddressSymbol(BA) : Asm->getSymbol(F)); - - OS.EmitIntValue(EnclosingLevel, 4); - OS.EmitValue(FilterOrNull, 4); - OS.EmitValue(ExceptOrFinally, 4); - - // The next state unwinds to this state. - EnclosingLevel = CurState; - CurState++; + assert(!FuncInfo.SEHUnwindMap.empty()); + for (const SEHUnwindMapEntry &UME : FuncInfo.SEHUnwindMap) { + auto *Handler = UME.Handler.get<MachineBasicBlock *>(); + const MCSymbol *ExceptOrFinally = + UME.IsFinally ? getMCSymbolForMBB(Asm, Handler) : Handler->getSymbol(); + // -1 is usually the base state for "unwind to caller", but for + // _except_handler4 it's -2. Do that replacement here if necessary. + int ToState = UME.ToState == -1 ? BaseState : UME.ToState; + AddComment("ToState"); + OS.EmitIntValue(ToState, 4); + AddComment(UME.IsFinally ? "Null" : "FilterFunction"); + OS.EmitValue(create32bitRef(UME.Filter), 4); + AddComment(UME.IsFinally ? "FinallyFunclet" : "ExceptionHandler"); + OS.EmitValue(create32bitRef(ExceptOrFinally), 4); + } +} + +static int getRank(const WinEHFuncInfo &FuncInfo, int State) { + int Rank = 0; + while (State != -1) { + ++Rank; + State = FuncInfo.ClrEHUnwindMap[State].Parent; + } + return Rank; +} + +static int getAncestor(const WinEHFuncInfo &FuncInfo, int Left, int Right) { + int LeftRank = getRank(FuncInfo, Left); + int RightRank = getRank(FuncInfo, Right); + + while (LeftRank < RightRank) { + Right = FuncInfo.ClrEHUnwindMap[Right].Parent; + --RightRank; + } + + while (RightRank < LeftRank) { + Left = FuncInfo.ClrEHUnwindMap[Left].Parent; + --LeftRank; + } + + while (Left != Right) { + Left = FuncInfo.ClrEHUnwindMap[Left].Parent; + Right = FuncInfo.ClrEHUnwindMap[Right].Parent; + } + + return Left; +} + +void WinException::emitCLRExceptionTable(const MachineFunction *MF) { + // CLR EH "states" are really just IDs that identify handlers/funclets; + // states, handlers, and funclets all have 1:1 mappings between them, and a + // handler/funclet's "state" is its index in the ClrEHUnwindMap. + MCStreamer &OS = *Asm->OutStreamer; + const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo(); + MCSymbol *FuncBeginSym = Asm->getFunctionBegin(); + MCSymbol *FuncEndSym = Asm->getFunctionEnd(); + + // A ClrClause describes a protected region. + struct ClrClause { + const MCSymbol *StartLabel; // Start of protected region + const MCSymbol *EndLabel; // End of protected region + int State; // Index of handler protecting the protected region + int EnclosingState; // Index of funclet enclosing the protected region + }; + SmallVector<ClrClause, 8> Clauses; + + // Build a map from handler MBBs to their corresponding states (i.e. their + // indices in the ClrEHUnwindMap). + int NumStates = FuncInfo.ClrEHUnwindMap.size(); + assert(NumStates > 0 && "Don't need exception table!"); + DenseMap<const MachineBasicBlock *, int> HandlerStates; + for (int State = 0; State < NumStates; ++State) { + MachineBasicBlock *HandlerBlock = + FuncInfo.ClrEHUnwindMap[State].Handler.get<MachineBasicBlock *>(); + HandlerStates[HandlerBlock] = State; + // Use this loop through all handlers to verify our assumption (used in + // the MinEnclosingState computation) that ancestors have lower state + // numbers than their descendants. + assert(FuncInfo.ClrEHUnwindMap[State].Parent < State && + "ill-formed state numbering"); + } + // Map the main function to the NullState. + HandlerStates[&MF->front()] = NullState; + + // Write out a sentinel indicating the end of the standard (Windows) xdata + // and the start of the additional (CLR) info. + OS.EmitIntValue(0xffffffff, 4); + // Write out the number of funclets + OS.EmitIntValue(NumStates, 4); + + // Walk the machine blocks/instrs, computing and emitting a few things: + // 1. Emit a list of the offsets to each handler entry, in lexical order. + // 2. Compute a map (EndSymbolMap) from each funclet to the symbol at its end. + // 3. Compute the list of ClrClauses, in the required order (inner before + // outer, earlier before later; the order by which a forward scan with + // early termination will find the innermost enclosing clause covering + // a given address). + // 4. A map (MinClauseMap) from each handler index to the index of the + // outermost funclet/function which contains a try clause targeting the + // key handler. This will be used to determine IsDuplicate-ness when + // emitting ClrClauses. The NullState value is used to indicate that the + // top-level function contains a try clause targeting the key handler. + // HandlerStack is a stack of (PendingStartLabel, PendingState) pairs for + // try regions we entered before entering the PendingState try but which + // we haven't yet exited. + SmallVector<std::pair<const MCSymbol *, int>, 4> HandlerStack; + // EndSymbolMap and MinClauseMap are maps described above. + std::unique_ptr<MCSymbol *[]> EndSymbolMap(new MCSymbol *[NumStates]); + SmallVector<int, 4> MinClauseMap((size_t)NumStates, NumStates); + + // Visit the root function and each funclet. + + for (MachineFunction::const_iterator FuncletStart = MF->begin(), + FuncletEnd = MF->begin(), + End = MF->end(); + FuncletStart != End; FuncletStart = FuncletEnd) { + int FuncletState = HandlerStates[&*FuncletStart]; + // Find the end of the funclet + MCSymbol *EndSymbol = FuncEndSym; + while (++FuncletEnd != End) { + if (FuncletEnd->isEHFuncletEntry()) { + EndSymbol = getMCSymbolForMBB(Asm, &*FuncletEnd); + break; + } } + // Emit the function/funclet end and, if this is a funclet (and not the + // root function), record it in the EndSymbolMap. + OS.EmitValue(getOffset(EndSymbol, FuncBeginSym), 4); + if (FuncletState != NullState) { + // Record the end of the handler. + EndSymbolMap[FuncletState] = EndSymbol; + } + + // Walk the state changes in this function/funclet and compute its clauses. + // Funclets always start in the null state. + const MCSymbol *CurrentStartLabel = nullptr; + int CurrentState = NullState; + assert(HandlerStack.empty()); + for (const auto &StateChange : + InvokeStateChangeIterator::range(FuncInfo, FuncletStart, FuncletEnd)) { + // Close any try regions we're not still under + int AncestorState = + getAncestor(FuncInfo, CurrentState, StateChange.NewState); + while (CurrentState != AncestorState) { + assert(CurrentState != NullState && "Failed to find ancestor!"); + // Close the pending clause + Clauses.push_back({CurrentStartLabel, StateChange.PreviousEndLabel, + CurrentState, FuncletState}); + // Now the parent handler is current + CurrentState = FuncInfo.ClrEHUnwindMap[CurrentState].Parent; + // Pop the new start label from the handler stack if we've exited all + // descendants of the corresponding handler. + if (HandlerStack.back().second == CurrentState) + CurrentStartLabel = HandlerStack.pop_back_val().first; + } + + if (StateChange.NewState != CurrentState) { + // For each clause we're starting, update the MinClauseMap so we can + // know which is the topmost funclet containing a clause targeting + // it. + for (int EnteredState = StateChange.NewState; + EnteredState != CurrentState; + EnteredState = FuncInfo.ClrEHUnwindMap[EnteredState].Parent) { + int &MinEnclosingState = MinClauseMap[EnteredState]; + if (FuncletState < MinEnclosingState) + MinEnclosingState = FuncletState; + } + // Save the previous current start/label on the stack and update to + // the newly-current start/state. + HandlerStack.emplace_back(CurrentStartLabel, CurrentState); + CurrentStartLabel = StateChange.NewStartLabel; + CurrentState = StateChange.NewState; + } + } + assert(HandlerStack.empty()); + } + + // Now emit the clause info, starting with the number of clauses. + OS.EmitIntValue(Clauses.size(), 4); + for (ClrClause &Clause : Clauses) { + // Emit a CORINFO_EH_CLAUSE : + /* + struct CORINFO_EH_CLAUSE + { + CORINFO_EH_CLAUSE_FLAGS Flags; // actually a CorExceptionFlag + DWORD TryOffset; + DWORD TryLength; // actually TryEndOffset + DWORD HandlerOffset; + DWORD HandlerLength; // actually HandlerEndOffset + union + { + DWORD ClassToken; // use for catch clauses + DWORD FilterOffset; // use for filter clauses + }; + }; + + enum CORINFO_EH_CLAUSE_FLAGS + { + CORINFO_EH_CLAUSE_NONE = 0, + CORINFO_EH_CLAUSE_FILTER = 0x0001, // This clause is for a filter + CORINFO_EH_CLAUSE_FINALLY = 0x0002, // This clause is a finally clause + CORINFO_EH_CLAUSE_FAULT = 0x0004, // This clause is a fault clause + }; + typedef enum CorExceptionFlag + { + COR_ILEXCEPTION_CLAUSE_NONE, + COR_ILEXCEPTION_CLAUSE_FILTER = 0x0001, // This is a filter clause + COR_ILEXCEPTION_CLAUSE_FINALLY = 0x0002, // This is a finally clause + COR_ILEXCEPTION_CLAUSE_FAULT = 0x0004, // This is a fault clause + COR_ILEXCEPTION_CLAUSE_DUPLICATED = 0x0008, // duplicated clause. This + // clause was duplicated + // to a funclet which was + // pulled out of line + } CorExceptionFlag; + */ + // Add 1 to the start/end of the EH clause; the IP associated with a + // call when the runtime does its scan is the IP of the next instruction + // (the one to which control will return after the call), so we need + // to add 1 to the end of the clause to cover that offset. We also add + // 1 to the start of the clause to make sure that the ranges reported + // for all clauses are disjoint. Note that we'll need some additional + // logic when machine traps are supported, since in that case the IP + // that the runtime uses is the offset of the faulting instruction + // itself; if such an instruction immediately follows a call but the + // two belong to different clauses, we'll need to insert a nop between + // them so the runtime can distinguish the point to which the call will + // return from the point at which the fault occurs. + + const MCExpr *ClauseBegin = + getOffsetPlusOne(Clause.StartLabel, FuncBeginSym); + const MCExpr *ClauseEnd = getOffsetPlusOne(Clause.EndLabel, FuncBeginSym); + + const ClrEHUnwindMapEntry &Entry = FuncInfo.ClrEHUnwindMap[Clause.State]; + MachineBasicBlock *HandlerBlock = Entry.Handler.get<MachineBasicBlock *>(); + MCSymbol *BeginSym = getMCSymbolForMBB(Asm, HandlerBlock); + const MCExpr *HandlerBegin = getOffset(BeginSym, FuncBeginSym); + MCSymbol *EndSym = EndSymbolMap[Clause.State]; + const MCExpr *HandlerEnd = getOffset(EndSym, FuncBeginSym); + + uint32_t Flags = 0; + switch (Entry.HandlerType) { + case ClrHandlerType::Catch: + // Leaving bits 0-2 clear indicates catch. + break; + case ClrHandlerType::Filter: + Flags |= 1; + break; + case ClrHandlerType::Finally: + Flags |= 2; + break; + case ClrHandlerType::Fault: + Flags |= 4; + break; + } + if (Clause.EnclosingState != MinClauseMap[Clause.State]) { + // This is a "duplicate" clause; the handler needs to be entered from a + // frame above the one holding the invoke. + assert(Clause.EnclosingState > MinClauseMap[Clause.State]); + Flags |= 8; + } + OS.EmitIntValue(Flags, 4); + + // Write the clause start/end + OS.EmitValue(ClauseBegin, 4); + OS.EmitValue(ClauseEnd, 4); + + // Write out the handler start/end + OS.EmitValue(HandlerBegin, 4); + OS.EmitValue(HandlerEnd, 4); + + // Write out the type token or filter offset + assert(Entry.HandlerType != ClrHandlerType::Filter && "NYI: filters"); + OS.EmitIntValue(Entry.TypeToken, 4); } } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h index 669c9cc366ba..acb301016910 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h @@ -21,6 +21,7 @@ class Function; class GlobalValue; class MachineFunction; class MCExpr; +class Value; struct WinEHFuncInfo; class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer { @@ -36,7 +37,14 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer { /// True if this is a 64-bit target and we should use image relative offsets. bool useImageRel32 = false; - void emitCSpecificHandlerTable(); + /// Pointer to the current funclet entry BB. + const MachineBasicBlock *CurrentFuncletEntry = nullptr; + + void emitCSpecificHandlerTable(const MachineFunction *MF); + + void emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo, + const MCSymbol *BeginLabel, + const MCSymbol *EndLabel, int State); /// Emit the EH table data for 32-bit and 64-bit functions using /// the __CxxFrameHandler3 personality. @@ -47,8 +55,11 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer { /// tables. void emitExceptHandlerTable(const MachineFunction *MF); - void extendIP2StateTable(const MachineFunction *MF, const Function *ParentF, - WinEHFuncInfo &FuncInfo); + void emitCLRExceptionTable(const MachineFunction *MF); + + void computeIP2StateTable( + const MachineFunction *MF, const WinEHFuncInfo &FuncInfo, + SmallVectorImpl<std::pair<const MCExpr *, int>> &IPToStateTable); /// Emits the label used with llvm.x86.seh.recoverfp, which is used by /// outlined funclets. @@ -57,6 +68,16 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer { const MCExpr *create32bitRef(const MCSymbol *Value); const MCExpr *create32bitRef(const GlobalValue *GV); + const MCExpr *getLabelPlusOne(const MCSymbol *Label); + const MCExpr *getOffset(const MCSymbol *OffsetOf, const MCSymbol *OffsetFrom); + const MCExpr *getOffsetPlusOne(const MCSymbol *OffsetOf, + const MCSymbol *OffsetFrom); + + /// Gets the offset that we should use in a table for a stack object with the + /// given index. For targets using CFI (Win64, etc), this is relative to the + /// established SP at the end of the prologue. For targets without CFI (Win32 + /// only), it is relative to the frame pointer. + int getFrameIndexOffset(int FrameIndex, const WinEHFuncInfo &FuncInfo); public: //===--------------------------------------------------------------------===// @@ -74,6 +95,10 @@ public: /// Gather and emit post-function exception information. void endFunction(const MachineFunction *) override; + + /// \brief Emit target-specific EH funclet machinery. + void beginFunclet(const MachineBasicBlock &MBB, MCSymbol *Sym) override; + void endFunclet() override; }; } diff --git a/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp b/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp index 530ab46db03b..d12fdb246984 100644 --- a/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -8,10 +8,14 @@ //===----------------------------------------------------------------------===// // // This file contains a pass (at IR level) to replace atomic instructions with -// either (intrinsic-based) load-linked/store-conditional loops or AtomicCmpXchg. +// target specific instruction which implement the same semantics in a way +// which better fits the target backend. This can include the use of either +// (intrinsic-based) load-linked/store-conditional loops, AtomicCmpXchg, or +// type coercions. // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/AtomicExpandUtils.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" @@ -20,6 +24,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -44,13 +49,17 @@ namespace { private: bool bracketInstWithFences(Instruction *I, AtomicOrdering Order, bool IsStore, bool IsLoad); - bool expandAtomicLoad(LoadInst *LI); + IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL); + LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI); + bool tryExpandAtomicLoad(LoadInst *LI); bool expandAtomicLoadToLL(LoadInst *LI); bool expandAtomicLoadToCmpXchg(LoadInst *LI); + StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI); bool expandAtomicStore(StoreInst *SI); bool tryExpandAtomicRMW(AtomicRMWInst *AI); - bool expandAtomicRMWToLLSC(AtomicRMWInst *AI); - bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI); + bool expandAtomicOpToLLSC( + Instruction *I, Value *Addr, AtomicOrdering MemOpOrder, + std::function<Value *(IRBuilder<> &, Value *)> PerformOp); bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); bool isIdempotentRMW(AtomicRMWInst *AI); bool simplifyIdempotentRMW(AtomicRMWInst *AI); @@ -108,7 +117,7 @@ bool AtomicExpand::runOnFunction(Function &F) { FenceOrdering = RMWI->getOrdering(); RMWI->setOrdering(Monotonic); IsStore = IsLoad = true; - } else if (CASI && !TLI->hasLoadLinkedStoreConditional() && + } else if (CASI && !TLI->shouldExpandAtomicCmpXchgInIR(CASI) && (isAtLeastRelease(CASI->getSuccessOrdering()) || isAtLeastAcquire(CASI->getSuccessOrdering()))) { // If a compare and swap is lowered to LL/SC, we can do smarter fence @@ -126,10 +135,28 @@ bool AtomicExpand::runOnFunction(Function &F) { } } - if (LI && TLI->shouldExpandAtomicLoadInIR(LI)) { - MadeChange |= expandAtomicLoad(LI); - } else if (SI && TLI->shouldExpandAtomicStoreInIR(SI)) { - MadeChange |= expandAtomicStore(SI); + if (LI) { + if (LI->getType()->isFloatingPointTy()) { + // TODO: add a TLI hook to control this so that each target can + // convert to lowering the original type one at a time. + LI = convertAtomicLoadToIntegerType(LI); + assert(LI->getType()->isIntegerTy() && "invariant broken"); + MadeChange = true; + } + + MadeChange |= tryExpandAtomicLoad(LI); + } else if (SI) { + if (SI->getValueOperand()->getType()->isFloatingPointTy()) { + // TODO: add a TLI hook to control this so that each target can + // convert to lowering the original type one at a time. + SI = convertAtomicStoreToIntegerType(SI); + assert(SI->getValueOperand()->getType()->isIntegerTy() && + "invariant broken"); + MadeChange = true; + } + + if (TLI->shouldExpandAtomicStoreInIR(SI)) + MadeChange |= expandAtomicStore(SI); } else if (RMWI) { // There are two different ways of expanding RMW instructions: // - into a load if it is idempotent @@ -141,7 +168,7 @@ bool AtomicExpand::runOnFunction(Function &F) { } else { MadeChange |= tryExpandAtomicRMW(RMWI); } - } else if (CASI && TLI->hasLoadLinkedStoreConditional()) { + } else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI)) { MadeChange |= expandAtomicCmpXchg(CASI); } } @@ -169,11 +196,56 @@ bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order, return (LeadingFence || TrailingFence); } -bool AtomicExpand::expandAtomicLoad(LoadInst *LI) { - if (TLI->hasLoadLinkedStoreConditional()) +/// Get the iX type with the same bitwidth as T. +IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T, + const DataLayout &DL) { + EVT VT = TLI->getValueType(DL, T); + unsigned BitWidth = VT.getStoreSizeInBits(); + assert(BitWidth == VT.getSizeInBits() && "must be a power of two"); + return IntegerType::get(T->getContext(), BitWidth); +} + +/// Convert an atomic load of a non-integral type to an integer load of the +/// equivelent bitwidth. See the function comment on +/// convertAtomicStoreToIntegerType for background. +LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) { + auto *M = LI->getModule(); + Type *NewTy = getCorrespondingIntegerType(LI->getType(), + M->getDataLayout()); + + IRBuilder<> Builder(LI); + + Value *Addr = LI->getPointerOperand(); + Type *PT = PointerType::get(NewTy, + Addr->getType()->getPointerAddressSpace()); + Value *NewAddr = Builder.CreateBitCast(Addr, PT); + + auto *NewLI = Builder.CreateLoad(NewAddr); + NewLI->setAlignment(LI->getAlignment()); + NewLI->setVolatile(LI->isVolatile()); + NewLI->setAtomic(LI->getOrdering(), LI->getSynchScope()); + DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n"); + + Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType()); + LI->replaceAllUsesWith(NewVal); + LI->eraseFromParent(); + return NewLI; +} + +bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) { + switch (TLI->shouldExpandAtomicLoadInIR(LI)) { + case TargetLoweringBase::AtomicExpansionKind::None: + return false; + case TargetLoweringBase::AtomicExpansionKind::LLSC: + return expandAtomicOpToLLSC( + LI, LI->getPointerOperand(), LI->getOrdering(), + [](IRBuilder<> &Builder, Value *Loaded) { return Loaded; }); + case TargetLoweringBase::AtomicExpansionKind::LLOnly: return expandAtomicLoadToLL(LI); - else + case TargetLoweringBase::AtomicExpansionKind::CmpXChg: return expandAtomicLoadToCmpXchg(LI); + } + llvm_unreachable("Unhandled case in tryExpandAtomicLoad"); } bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) { @@ -184,6 +256,7 @@ bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) { // to be single-copy atomic by ARM is an ldrexd (A3.5.3). Value *Val = TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering()); + TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder); LI->replaceAllUsesWith(Val); LI->eraseFromParent(); @@ -209,6 +282,35 @@ bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) { return true; } +/// Convert an atomic store of a non-integral type to an integer store of the +/// equivelent bitwidth. We used to not support floating point or vector +/// atomics in the IR at all. The backends learned to deal with the bitcast +/// idiom because that was the only way of expressing the notion of a atomic +/// float or vector store. The long term plan is to teach each backend to +/// instruction select from the original atomic store, but as a migration +/// mechanism, we convert back to the old format which the backends understand. +/// Each backend will need individual work to recognize the new format. +StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) { + IRBuilder<> Builder(SI); + auto *M = SI->getModule(); + Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(), + M->getDataLayout()); + Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy); + + Value *Addr = SI->getPointerOperand(); + Type *PT = PointerType::get(NewTy, + Addr->getType()->getPointerAddressSpace()); + Value *NewAddr = Builder.CreateBitCast(Addr, PT); + + StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr); + NewSI->setAlignment(SI->getAlignment()); + NewSI->setVolatile(SI->isVolatile()); + NewSI->setAtomic(SI->getOrdering(), SI->getSynchScope()); + DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n"); + SI->eraseFromParent(); + return NewSI; +} + bool AtomicExpand::expandAtomicStore(StoreInst *SI) { // This function is only called on atomic stores that are too large to be // atomic if implemented as a native store. So we replace them by an @@ -226,23 +328,15 @@ bool AtomicExpand::expandAtomicStore(StoreInst *SI) { return tryExpandAtomicRMW(AI); } -bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { - switch (TLI->shouldExpandAtomicRMWInIR(AI)) { - case TargetLoweringBase::AtomicRMWExpansionKind::None: - return false; - case TargetLoweringBase::AtomicRMWExpansionKind::LLSC: { - assert(TLI->hasLoadLinkedStoreConditional() && - "TargetLowering requested we expand AtomicRMW instruction into " - "load-linked/store-conditional combos, but such instructions aren't " - "supported"); - - return expandAtomicRMWToLLSC(AI); - } - case TargetLoweringBase::AtomicRMWExpansionKind::CmpXChg: { - return expandAtomicRMWToCmpXchg(AI); - } - } - llvm_unreachable("Unhandled case in tryExpandAtomicRMW"); +static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr, + Value *Loaded, Value *NewVal, + AtomicOrdering MemOpOrder, + Value *&Success, Value *&NewLoaded) { + Value* Pair = Builder.CreateAtomicCmpXchg( + Addr, Loaded, NewVal, MemOpOrder, + AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder)); + Success = Builder.CreateExtractValue(Pair, 1, "success"); + NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); } /// Emit IR to implement the given atomicrmw operation on values in registers, @@ -282,10 +376,28 @@ static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, } } -bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) { - AtomicOrdering MemOpOrder = AI->getOrdering(); - Value *Addr = AI->getPointerOperand(); - BasicBlock *BB = AI->getParent(); +bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { + switch (TLI->shouldExpandAtomicRMWInIR(AI)) { + case TargetLoweringBase::AtomicExpansionKind::None: + return false; + case TargetLoweringBase::AtomicExpansionKind::LLSC: + return expandAtomicOpToLLSC(AI, AI->getPointerOperand(), AI->getOrdering(), + [&](IRBuilder<> &Builder, Value *Loaded) { + return performAtomicOp(AI->getOperation(), + Builder, Loaded, + AI->getValOperand()); + }); + case TargetLoweringBase::AtomicExpansionKind::CmpXChg: + return expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun); + default: + llvm_unreachable("Unhandled case in tryExpandAtomicRMW"); + } +} + +bool AtomicExpand::expandAtomicOpToLLSC( + Instruction *I, Value *Addr, AtomicOrdering MemOpOrder, + std::function<Value *(IRBuilder<> &, Value *)> PerformOp) { + BasicBlock *BB = I->getParent(); Function *F = BB->getParent(); LLVMContext &Ctx = F->getContext(); @@ -303,11 +415,11 @@ bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) { // atomicrmw.end: // fence? // [...] - BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end"); + BasicBlock *ExitBB = BB->splitBasicBlock(I->getIterator(), "atomicrmw.end"); BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); - // This grabs the DebugLoc from AI. - IRBuilder<> Builder(AI); + // This grabs the DebugLoc from I. + IRBuilder<> Builder(I); // The split call above "helpfully" added a branch at the end of BB (to the // wrong place), but we might want a fence too. It's easiest to just remove @@ -320,8 +432,7 @@ bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) { Builder.SetInsertPoint(LoopBB); Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); - Value *NewVal = - performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand()); + Value *NewVal = PerformOp(Builder, Loaded); Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder); @@ -331,72 +442,8 @@ bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) { Builder.SetInsertPoint(ExitBB, ExitBB->begin()); - AI->replaceAllUsesWith(Loaded); - AI->eraseFromParent(); - - return true; -} - -bool AtomicExpand::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI) { - AtomicOrdering MemOpOrder = - AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering(); - Value *Addr = AI->getPointerOperand(); - BasicBlock *BB = AI->getParent(); - Function *F = BB->getParent(); - LLVMContext &Ctx = F->getContext(); - - // Given: atomicrmw some_op iN* %addr, iN %incr ordering - // - // The standard expansion we produce is: - // [...] - // %init_loaded = load atomic iN* %addr - // br label %loop - // loop: - // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ] - // %new = some_op iN %loaded, %incr - // %pair = cmpxchg iN* %addr, iN %loaded, iN %new - // %new_loaded = extractvalue { iN, i1 } %pair, 0 - // %success = extractvalue { iN, i1 } %pair, 1 - // br i1 %success, label %atomicrmw.end, label %loop - // atomicrmw.end: - // [...] - BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end"); - BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); - - // This grabs the DebugLoc from AI. - IRBuilder<> Builder(AI); - - // The split call above "helpfully" added a branch at the end of BB (to the - // wrong place), but we want a load. It's easiest to just remove - // the branch entirely. - std::prev(BB->end())->eraseFromParent(); - Builder.SetInsertPoint(BB); - LoadInst *InitLoaded = Builder.CreateLoad(Addr); - // Atomics require at least natural alignment. - InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits()); - Builder.CreateBr(LoopBB); - - // Start the main loop block now that we've taken care of the preliminaries. - Builder.SetInsertPoint(LoopBB); - PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded"); - Loaded->addIncoming(InitLoaded, BB); - - Value *NewVal = - performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand()); - - Value *Pair = Builder.CreateAtomicCmpXchg( - Addr, Loaded, NewVal, MemOpOrder, - AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder)); - Value *NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); - Loaded->addIncoming(NewLoaded, LoopBB); - - Value *Success = Builder.CreateExtractValue(Pair, 1, "success"); - Builder.CreateCondBr(Success, ExitBB, LoopBB); - - Builder.SetInsertPoint(ExitBB, ExitBB->begin()); - - AI->replaceAllUsesWith(NewLoaded); - AI->eraseFromParent(); + I->replaceAllUsesWith(Loaded); + I->eraseFromParent(); return true; } @@ -424,7 +471,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // %loaded = @load.linked(%addr) // %should_store = icmp eq %loaded, %desired // br i1 %should_store, label %cmpxchg.trystore, - // label %cmpxchg.failure + // label %cmpxchg.nostore // cmpxchg.trystore: // %stored = @store_conditional(%new, %addr) // %success = icmp eq i32 %stored, 0 @@ -432,6 +479,9 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // cmpxchg.success: // fence? // br label %cmpxchg.end + // cmpxchg.nostore: + // @load_linked_fail_balance()? + // br label %cmpxchg.failure // cmpxchg.failure: // fence? // br label %cmpxchg.end @@ -440,9 +490,10 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1 // [...] - BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end"); + BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end"); auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB); - auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, FailureBB); + auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB); + auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB); auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB); auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB); @@ -466,7 +517,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // If the cmpxchg doesn't actually need any ordering when it fails, we can // jump straight past that fence instruction (if it exists). - Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB); + Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB); Builder.SetInsertPoint(TryStoreBB); Value *StoreSuccess = TLI->emitStoreConditional( @@ -482,6 +533,13 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { /*IsLoad=*/true); Builder.CreateBr(ExitBB); + Builder.SetInsertPoint(NoStoreBB); + // In the failing case, where we don't execute the store-conditional, the + // target might want to balance out the load-linked with a dedicated + // instruction (e.g., on ARM, clearing the exclusive monitor). + TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder); + Builder.CreateBr(FailureBB); + Builder.SetInsertPoint(FailureBB); TLI->emitTrailingFence(Builder, FailureOrder, /*IsStore=*/true, /*IsLoad=*/true); @@ -556,9 +614,77 @@ bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) { bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) { if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) { - if (TLI->shouldExpandAtomicLoadInIR(ResultingLoad)) - expandAtomicLoad(ResultingLoad); + tryExpandAtomicLoad(ResultingLoad); return true; } return false; } + +bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, + CreateCmpXchgInstFun CreateCmpXchg) { + assert(AI); + + AtomicOrdering MemOpOrder = + AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering(); + Value *Addr = AI->getPointerOperand(); + BasicBlock *BB = AI->getParent(); + Function *F = BB->getParent(); + LLVMContext &Ctx = F->getContext(); + + // Given: atomicrmw some_op iN* %addr, iN %incr ordering + // + // The standard expansion we produce is: + // [...] + // %init_loaded = load atomic iN* %addr + // br label %loop + // loop: + // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ] + // %new = some_op iN %loaded, %incr + // %pair = cmpxchg iN* %addr, iN %loaded, iN %new + // %new_loaded = extractvalue { iN, i1 } %pair, 0 + // %success = extractvalue { iN, i1 } %pair, 1 + // br i1 %success, label %atomicrmw.end, label %loop + // atomicrmw.end: + // [...] + BasicBlock *ExitBB = BB->splitBasicBlock(AI->getIterator(), "atomicrmw.end"); + BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); + + // This grabs the DebugLoc from AI. + IRBuilder<> Builder(AI); + + // The split call above "helpfully" added a branch at the end of BB (to the + // wrong place), but we want a load. It's easiest to just remove + // the branch entirely. + std::prev(BB->end())->eraseFromParent(); + Builder.SetInsertPoint(BB); + LoadInst *InitLoaded = Builder.CreateLoad(Addr); + // Atomics require at least natural alignment. + InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits() / 8); + Builder.CreateBr(LoopBB); + + // Start the main loop block now that we've taken care of the preliminaries. + Builder.SetInsertPoint(LoopBB); + PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded"); + Loaded->addIncoming(InitLoaded, BB); + + Value *NewVal = + performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand()); + + Value *NewLoaded = nullptr; + Value *Success = nullptr; + + CreateCmpXchg(Builder, Addr, Loaded, NewVal, MemOpOrder, + Success, NewLoaded); + assert(Success && NewLoaded); + + Loaded->addIncoming(NewLoaded, LoopBB); + + Builder.CreateCondBr(Success, ExitBB, LoopBB); + + Builder.SetInsertPoint(ExitBB, ExitBB->begin()); + + AI->replaceAllUsesWith(NewLoaded); + AI->eraseFromParent(); + + return true; +} diff --git a/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp b/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp index db00910cd018..a67e194356d8 100644 --- a/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -33,6 +33,6 @@ cl::opt<unsigned> cl::desc("Threshold for partial unrolling"), cl::Hidden); -BasicTTIImpl::BasicTTIImpl(const TargetMachine *TM, Function &F) +BasicTTIImpl::BasicTTIImpl(const TargetMachine *TM, const Function &F) : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp index 618266731c06..604feeddd355 100644 --- a/contrib/llvm/lib/CodeGen/BranchFolding.cpp +++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp @@ -12,7 +12,8 @@ // it then removes. // // Note that this pass must be run after register allocation, it cannot handle -// SSA form. +// SSA form. It also must handle virtual registers for targets that emit virtual +// ISA (e.g. NVPTX). // //===----------------------------------------------------------------------===// @@ -20,6 +21,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -95,7 +97,7 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { // TailMerge can create jump into if branches that make CFG irreducible for // HW that requires structurized CFG. bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() && - PassConfig->getEnableTailMerge(); + PassConfig->getEnableTailMerge(); BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true, getAnalysis<MachineBlockFrequencyInfo>(), getAnalysis<MachineBranchProbabilityInfo>()); @@ -132,6 +134,7 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) { // Remove the block. MF->erase(MBB); + FuncletMembership.erase(MBB); } /// OptimizeImpDefsBlock - If a basic block is just a bunch of implicit_def @@ -150,9 +153,13 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) { if (!I->isImplicitDef()) break; unsigned Reg = I->getOperand(0).getReg(); - for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); - SubRegs.isValid(); ++SubRegs) - ImpDefRegs.insert(*SubRegs); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + ImpDefRegs.insert(*SubRegs); + } else { + ImpDefRegs.insert(Reg); + } ++I; } if (ImpDefRegs.empty()) @@ -163,8 +170,7 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) { if (!TII->isUnpredicatedTerminator(I)) return false; // See if it uses any of the implicitly defined registers. - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { - MachineOperand &MO = I->getOperand(i); + for (const MachineOperand &MO : I->operands()) { if (!MO.isReg() || !MO.isUse()) continue; unsigned Reg = MO.getReg(); @@ -208,14 +214,17 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, // Fix CFG. The later algorithms expect it to be right. bool MadeChange = false; - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; I++) { - MachineBasicBlock *MBB = I, *TBB = nullptr, *FBB = nullptr; + for (MachineBasicBlock &MBB : MF) { + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 4> Cond; - if (!TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true)) - MadeChange |= MBB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); - MadeChange |= OptimizeImpDefsBlock(MBB); + if (!TII->AnalyzeBranch(MBB, TBB, FBB, Cond, true)) + MadeChange |= MBB.CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); + MadeChange |= OptimizeImpDefsBlock(&MBB); } + // Recalculate funclet membership. + FuncletMembership = getFuncletMembership(MF); + bool MadeChangeThisIteration = true; while (MadeChangeThisIteration) { MadeChangeThisIteration = TailMergeBlocks(MF); @@ -235,12 +244,9 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, // Walk the function to find jump tables that are live. BitVector JTIsLive(JTI->getJumpTables().size()); - for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); - BB != E; ++BB) { - for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); - I != E; ++I) - for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { - MachineOperand &Op = I->getOperand(op); + for (const MachineBasicBlock &BB : MF) { + for (const MachineInstr &I : BB) + for (const MachineOperand &Op : I.operands()) { if (!Op.isJTI()) continue; // Remember that this JT is live. @@ -365,7 +371,7 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, } // Back past possible debugging pseudos at beginning of block. This matters // when one block differs from the other only by whether debugging pseudos - // are present at the beginning. (This way, the various checks later for + // are present at the beginning. (This way, the various checks later for // I1==MBB1->begin() work as expected.) if (I1 == MBB1->begin() && I2 != MBB2->begin()) { --I2; @@ -426,7 +432,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, MachineFunction &MF = *CurMBB.getParent(); // Create the fall-through block. - MachineFunction::iterator MBBI = &CurMBB; + MachineFunction::iterator MBBI = CurMBB.getIterator(); MachineBasicBlock *NewMBB =MF.CreateMachineBasicBlock(BB); CurMBB.getParent()->insert(++MBBI, NewMBB); @@ -445,6 +451,11 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, // For targets that use the register scavenger, we must maintain LiveIns. MaintainLiveIns(&CurMBB, NewMBB); + // Add the new block to the funclet. + const auto &FuncletI = FuncletMembership.find(&CurMBB); + if (FuncletI != FuncletMembership.end()) + FuncletMembership[NewMBB] = FuncletI->second; + return NewMBB; } @@ -479,7 +490,7 @@ static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB, DebugLoc dl; // FIXME: this is nowhere if (I != MF->end() && !TII->AnalyzeBranch(*CurMBB, TBB, FBB, Cond, true)) { - MachineBasicBlock *NextBB = I; + MachineBasicBlock *NextBB = &*I; if (TBB == NextBB && !Cond.empty() && !FBB) { if (!TII->ReverseBranchCondition(Cond)) { TII->RemoveBranch(*CurMBB); @@ -549,14 +560,23 @@ static unsigned CountTerminators(MachineBasicBlock *MBB, /// and decide if it would be profitable to merge those tails. Return the /// length of the common tail and iterators to the first common instruction /// in each block. -static bool ProfitableToMerge(MachineBasicBlock *MBB1, - MachineBasicBlock *MBB2, - unsigned minCommonTailLength, - unsigned &CommonTailLen, - MachineBasicBlock::iterator &I1, - MachineBasicBlock::iterator &I2, - MachineBasicBlock *SuccBB, - MachineBasicBlock *PredBB) { +static bool +ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2, + unsigned minCommonTailLength, unsigned &CommonTailLen, + MachineBasicBlock::iterator &I1, + MachineBasicBlock::iterator &I2, MachineBasicBlock *SuccBB, + MachineBasicBlock *PredBB, + DenseMap<const MachineBasicBlock *, int> &FuncletMembership) { + // It is never profitable to tail-merge blocks from two different funclets. + if (!FuncletMembership.empty()) { + auto Funclet1 = FuncletMembership.find(MBB1); + assert(Funclet1 != FuncletMembership.end()); + auto Funclet2 = FuncletMembership.find(MBB2); + assert(Funclet2 != FuncletMembership.end()); + if (Funclet1->second != Funclet2->second) + return false; + } + CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2); if (CommonTailLen == 0) return false; @@ -600,12 +620,8 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1, // branch instruction, which is likely to be smaller than the 2 // instructions that would be deleted in the merge. MachineFunction *MF = MBB1->getParent(); - if (EffectiveTailLen >= 2 && - MF->getFunction()->hasFnAttribute(Attribute::OptimizeForSize) && - (I1 == MBB1->begin() || I2 == MBB2->begin())) - return true; - - return false; + return EffectiveTailLen >= 2 && MF->getFunction()->optForSize() && + (I1 == MBB1->begin() || I2 == MBB2->begin()); } /// ComputeSameTails - Look through all the blocks in MergePotentials that have @@ -634,7 +650,8 @@ unsigned BranchFolder::ComputeSameTails(unsigned CurHash, if (ProfitableToMerge(CurMPIter->getBlock(), I->getBlock(), minCommonTailLength, CommonTailLen, TrialBBI1, TrialBBI2, - SuccBB, PredBB)) { + SuccBB, PredBB, + FuncletMembership)) { if (CommonTailLen > maxCommonTailLength) { SameTails.clear(); maxCommonTailLength = CommonTailLen; @@ -776,7 +793,7 @@ removeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos, if (MBBICommon->mayLoad() || MBBICommon->mayStore()) if (!hasIdenticalMMOs(&*MBBI, &*MBBICommon)) - MBBICommon->clearMemRefs(); + MBBICommon->dropMemRefs(); ++MBBI; ++MBBICommon; @@ -840,8 +857,8 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, // block, which we can't jump to), we can treat all blocks with this same // tail at once. Use PredBB if that is one of the possibilities, as that // will not introduce any extra branches. - MachineBasicBlock *EntryBB = MergePotentials.begin()->getBlock()-> - getParent()->begin(); + MachineBasicBlock *EntryBB = + &MergePotentials.front().getBlock()->getParent()->front(); unsigned commonTailIndex = SameTails.size(); // If there are two blocks, check to see if one can be made to fall through // into the other. @@ -917,12 +934,11 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // First find blocks with no successors. MergePotentials.clear(); - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); - I != E && MergePotentials.size() < TailMergeThreshold; ++I) { - if (TriedMerging.count(I)) - continue; - if (I->succ_empty()) - MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(I), I)); + for (MachineBasicBlock &MBB : MF) { + if (MergePotentials.size() == TailMergeThreshold) + break; + if (!TriedMerging.count(&MBB) && MBB.succ_empty()) + MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(&MBB), &MBB)); } // If this is a large problem, avoid visiting the same basic blocks @@ -958,13 +974,13 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { I != E; ++I) { if (I->pred_size() < 2) continue; SmallPtrSet<MachineBasicBlock *, 8> UniquePreds; - MachineBasicBlock *IBB = I; - MachineBasicBlock *PredBB = std::prev(I); + MachineBasicBlock *IBB = &*I; + MachineBasicBlock *PredBB = &*std::prev(I); MergePotentials.clear(); - for (MachineBasicBlock::pred_iterator P = I->pred_begin(), - E2 = I->pred_end(); - P != E2 && MergePotentials.size() < TailMergeThreshold; ++P) { - MachineBasicBlock *PBB = *P; + for (MachineBasicBlock *PBB : I->predecessors()) { + if (MergePotentials.size() == TailMergeThreshold) + break; + if (TriedMerging.count(PBB)) continue; @@ -977,7 +993,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { continue; // Skip blocks which may jump to a landing pad. Can't tail merge these. - if (PBB->getLandingPadSuccessor()) + if (PBB->hasEHPadSuccessor()) continue; MachineBasicBlock *TBB = nullptr, *FBB = nullptr; @@ -990,18 +1006,21 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { if (TII->ReverseBranchCondition(NewCond)) continue; // This is the QBB case described above - if (!FBB) - FBB = std::next(MachineFunction::iterator(PBB)); + if (!FBB) { + auto Next = ++PBB->getIterator(); + if (Next != MF.end()) + FBB = &*Next; + } } // Failing case: the only way IBB can be reached from PBB is via // exception handling. Happens for landing pads. Would be nice to have // a bit in the edge so we didn't have to do all this. - if (IBB->isLandingPad()) { - MachineFunction::iterator IP = PBB; IP++; + if (IBB->isEHPad()) { + MachineFunction::iterator IP = ++PBB->getIterator(); MachineBasicBlock *PredNextBB = nullptr; if (IP != MF.end()) - PredNextBB = IP; + PredNextBB = &*IP; if (!TBB) { if (IBB != PredNextBB) // fallthrough continue; @@ -1027,7 +1046,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { NewCond, dl); } - MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P)); + MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), PBB)); } } @@ -1042,7 +1061,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // Reinsert an unconditional branch if needed. The 1 below can occur as a // result of removing blocks in TryTailMergeBlocks. - PredBB = std::prev(I); // this may have been changed in TryTailMergeBlocks + PredBB = &*std::prev(I); // this may have been changed in TryTailMergeBlocks if (MergePotentials.size() == 1 && MergePotentials.begin()->getBlock() != PredBB) FixTail(MergePotentials.begin()->getBlock(), IBB, TII); @@ -1080,13 +1099,19 @@ void BranchFolder::setCommonTailEdgeWeights(MachineBasicBlock &TailMBB) { if (TailMBB.succ_size() <= 1) return; - auto MaxEdgeFreq = *std::max_element(EdgeFreqLs.begin(), EdgeFreqLs.end()); - uint64_t Scale = MaxEdgeFreq.getFrequency() / UINT32_MAX + 1; + auto SumEdgeFreq = + std::accumulate(EdgeFreqLs.begin(), EdgeFreqLs.end(), BlockFrequency(0)) + .getFrequency(); auto EdgeFreq = EdgeFreqLs.begin(); - for (auto SuccI = TailMBB.succ_begin(), SuccE = TailMBB.succ_end(); - SuccI != SuccE; ++SuccI, ++EdgeFreq) - TailMBB.setSuccWeight(SuccI, EdgeFreq->getFrequency() / Scale); + if (SumEdgeFreq > 0) { + for (auto SuccI = TailMBB.succ_begin(), SuccE = TailMBB.succ_end(); + SuccI != SuccE; ++SuccI, ++EdgeFreq) { + auto Prob = BranchProbability::getBranchProbability( + EdgeFreq->getFrequency(), SumEdgeFreq); + TailMBB.setSuccProbability(SuccI, Prob); + } + } } //===----------------------------------------------------------------------===// @@ -1098,10 +1123,12 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) { // Make sure blocks are numbered in order MF.RenumberBlocks(); + // Renumbering blocks alters funclet membership, recalculate it. + FuncletMembership = getFuncletMembership(MF); for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end(); I != E; ) { - MachineBasicBlock *MBB = I++; + MachineBasicBlock *MBB = &*I++; MadeChange |= OptimizeBlock(MBB); // If it is dead, remove it. @@ -1111,6 +1138,7 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) { ++NumDeadBlocks; } } + return MadeChange; } @@ -1167,20 +1195,31 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { MachineFunction &MF = *MBB->getParent(); ReoptimizeBlock: - MachineFunction::iterator FallThrough = MBB; + MachineFunction::iterator FallThrough = MBB->getIterator(); ++FallThrough; + // Make sure MBB and FallThrough belong to the same funclet. + bool SameFunclet = true; + if (!FuncletMembership.empty() && FallThrough != MF.end()) { + auto MBBFunclet = FuncletMembership.find(MBB); + assert(MBBFunclet != FuncletMembership.end()); + auto FallThroughFunclet = FuncletMembership.find(&*FallThrough); + assert(FallThroughFunclet != FuncletMembership.end()); + SameFunclet = MBBFunclet->second == FallThroughFunclet->second; + } + // If this block is empty, make everyone use its fall-through, not the block // explicitly. Landing pads should not do this since the landing-pad table // points to this block. Blocks with their addresses taken shouldn't be // optimized away. - if (IsEmptyBlock(MBB) && !MBB->isLandingPad() && !MBB->hasAddressTaken()) { + if (IsEmptyBlock(MBB) && !MBB->isEHPad() && !MBB->hasAddressTaken() && + SameFunclet) { // Dead block? Leave for cleanup later. if (MBB->pred_empty()) return MadeChange; if (FallThrough == MF.end()) { // TODO: Simplify preds to not branch here if possible! - } else if (FallThrough->isLandingPad()) { + } else if (FallThrough->isEHPad()) { // Don't rewrite to a landing pad fallthough. That could lead to the case // where a BB jumps to more than one landing pad. // TODO: Is it ever worth rewriting predecessors which don't already @@ -1190,12 +1229,12 @@ ReoptimizeBlock: // instead. while (!MBB->pred_empty()) { MachineBasicBlock *Pred = *(MBB->pred_end()-1); - Pred->ReplaceUsesOfBlockWith(MBB, FallThrough); + Pred->ReplaceUsesOfBlockWith(MBB, &*FallThrough); } // If MBB was the target of a jump table, update jump tables to go to the // fallthrough instead. if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo()) - MJTI->ReplaceMBBInJumpTables(MBB, FallThrough); + MJTI->ReplaceMBBInJumpTables(MBB, &*FallThrough); MadeChange = true; } return MadeChange; @@ -1237,7 +1276,7 @@ ReoptimizeBlock: // AnalyzeBranch. if (PriorCond.empty() && !PriorTBB && MBB->pred_size() == 1 && PrevBB.succ_size() == 1 && - !MBB->hasAddressTaken() && !MBB->isLandingPad()) { + !MBB->hasAddressTaken() && !MBB->isEHPad()) { DEBUG(dbgs() << "\nMerging into block: " << PrevBB << "From MBB: " << *MBB); // Remove redundant DBG_VALUEs first. @@ -1333,7 +1372,7 @@ ReoptimizeBlock: TII->InsertBranch(PrevBB, MBB, nullptr, NewPriorCond, dl); // Move this block to the end of the function. - MBB->moveAfter(--MF.end()); + MBB->moveAfter(&MF.back()); MadeChange = true; ++NumBranchOpts; return MadeChange; @@ -1371,7 +1410,7 @@ ReoptimizeBlock: // other blocks across it. if (CurTBB && CurCond.empty() && !CurFBB && IsBranchOnlyBlock(MBB) && CurTBB != MBB && - !MBB->hasAddressTaken()) { + !MBB->hasAddressTaken() && !MBB->isEHPad()) { DebugLoc dl = getBranchDebugLoc(*MBB); // This block may contain just an unconditional branch. Because there can // be 'non-branch terminators' in the block, try removing the branch and @@ -1468,14 +1507,11 @@ ReoptimizeBlock: // see if it has a fall-through into its successor. bool CurFallsThru = MBB->canFallThrough(); - if (!MBB->isLandingPad()) { + if (!MBB->isEHPad()) { // Check all the predecessors of this block. If one of them has no fall // throughs, move this block right after it. - for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - E = MBB->pred_end(); PI != E; ++PI) { + for (MachineBasicBlock *PredBB : MBB->predecessors()) { // Analyze the branch at the end of the pred. - MachineBasicBlock *PredBB = *PI; - MachineFunction::iterator PredFallthrough = PredBB; ++PredFallthrough; MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; SmallVector<MachineOperand, 4> PredCond; if (PredBB != MBB && !PredBB->canFallThrough() && @@ -1493,8 +1529,7 @@ ReoptimizeBlock: // B elsewhere // next: if (CurFallsThru) { - MachineBasicBlock *NextBB = - std::next(MachineFunction::iterator(MBB)); + MachineBasicBlock *NextBB = &*std::next(MBB->getIterator()); CurCond.clear(); TII->InsertBranch(*MBB, NextBB, nullptr, CurCond, DebugLoc()); } @@ -1507,11 +1542,9 @@ ReoptimizeBlock: if (!CurFallsThru) { // Check all successors to see if we can move this block before it. - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - E = MBB->succ_end(); SI != E; ++SI) { + for (MachineBasicBlock *SuccBB : MBB->successors()) { // Analyze the branch at the end of the block before the succ. - MachineBasicBlock *SuccBB = *SI; - MachineFunction::iterator SuccPrev = SuccBB; --SuccPrev; + MachineFunction::iterator SuccPrev = --SuccBB->getIterator(); // If this block doesn't already fall-through to that successor, and if // the succ doesn't already have a block that can fall through into it, @@ -1519,7 +1552,7 @@ ReoptimizeBlock: // fallthrough to happen. if (SuccBB != MBB && &*SuccPrev != MBB && !SuccPrev->canFallThrough() && !CurUnAnalyzable && - !SuccBB->isLandingPad()) { + !SuccBB->isEHPad()) { MBB->moveBefore(SuccBB); MadeChange = true; goto ReoptimizeBlock; @@ -1531,10 +1564,18 @@ ReoptimizeBlock: // removed, move this block to the end of the function. MachineBasicBlock *PrevTBB = nullptr, *PrevFBB = nullptr; SmallVector<MachineOperand, 4> PrevCond; + // We're looking for cases where PrevBB could possibly fall through to + // FallThrough, but if FallThrough is an EH pad that wouldn't be useful + // so here we skip over any EH pads so we might have a chance to find + // a branch target from PrevBB. + while (FallThrough != MF.end() && FallThrough->isEHPad()) + ++FallThrough; + // Now check to see if the current block is sitting between PrevBB and + // a block to which it could fall through. if (FallThrough != MF.end() && !TII->AnalyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) && - PrevBB.isSuccessor(FallThrough)) { - MBB->moveAfter(--MF.end()); + PrevBB.isSuccessor(&*FallThrough)) { + MBB->moveAfter(&MF.back()); MadeChange = true; return MadeChange; } @@ -1553,7 +1594,7 @@ ReoptimizeBlock: bool BranchFolder::HoistCommonCode(MachineFunction &MF) { bool MadeChange = false; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ) { - MachineBasicBlock *MBB = I++; + MachineBasicBlock *MBB = &*I++; MadeChange |= HoistCommonCodeInSuccs(MBB); } @@ -1564,15 +1605,23 @@ bool BranchFolder::HoistCommonCode(MachineFunction &MF) { /// its 'true' successor. static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB, MachineBasicBlock *TrueBB) { - for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), - E = BB->succ_end(); SI != E; ++SI) { - MachineBasicBlock *SuccBB = *SI; + for (MachineBasicBlock *SuccBB : BB->successors()) if (SuccBB != TrueBB) return SuccBB; - } return nullptr; } +template <class Container> +static void addRegAndItsAliases(unsigned Reg, const TargetRegisterInfo *TRI, + Container &Set) { + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + Set.insert(*AI); + } else { + Set.insert(Reg); + } +} + /// findHoistingInsertPosAndDeps - Find the location to move common instructions /// in successors to. The location is usually just before the terminator, /// however if the terminator is a conditional branch and its previous @@ -1590,16 +1639,14 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, if (!TII->isUnpredicatedTerminator(Loc)) return MBB->end(); - for (unsigned i = 0, e = Loc->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = Loc->getOperand(i); + for (const MachineOperand &MO : Loc->operands()) { if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!Reg) continue; if (MO.isUse()) { - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - Uses.insert(*AI); + addRegAndItsAliases(Reg, TRI, Uses); } else { if (!MO.isDead()) // Don't try to hoist code in the rare case the terminator defines a @@ -1608,8 +1655,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, // If the terminator defines a register, make sure we don't hoist // the instruction whose def might be clobbered by the terminator. - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - Defs.insert(*AI); + addRegAndItsAliases(Reg, TRI, Defs); } } @@ -1626,8 +1672,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, --PI; bool IsDef = false; - for (unsigned i = 0, e = PI->getNumOperands(); !IsDef && i != e; ++i) { - const MachineOperand &MO = PI->getOperand(i); + for (const MachineOperand &MO : PI->operands()) { // If PI has a regmask operand, it is probably a call. Separate away. if (MO.isRegMask()) return Loc; @@ -1636,8 +1681,10 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, unsigned Reg = MO.getReg(); if (!Reg) continue; - if (Uses.count(Reg)) + if (Uses.count(Reg)) { IsDef = true; + break; + } } if (!IsDef) // The condition setting instruction is not just before the conditional @@ -1657,23 +1704,22 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, // Find out what registers are live. Note this routine is ignoring other live // registers which are only used by instructions in successor blocks. - for (unsigned i = 0, e = PI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = PI->getOperand(i); + for (const MachineOperand &MO : PI->operands()) { if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!Reg) continue; if (MO.isUse()) { - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - Uses.insert(*AI); + addRegAndItsAliases(Reg, TRI, Uses); } else { if (Uses.erase(Reg)) { - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) - Uses.erase(*SubRegs); // Use sub-registers to be conservative + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + Uses.erase(*SubRegs); // Use sub-registers to be conservative + } } - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - Defs.insert(*AI); + addRegAndItsAliases(Reg, TRI, Defs); } } @@ -1737,8 +1783,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { break; bool IsSafe = true; - for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) { - MachineOperand &MO = TIB->getOperand(i); + for (MachineOperand &MO : TIB->operands()) { // Don't attempt to hoist instructions with register masks. if (MO.isRegMask()) { IsSafe = false; @@ -1793,28 +1838,29 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { break; // Remove kills from LocalDefsSet, these registers had short live ranges. - for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) { - MachineOperand &MO = TIB->getOperand(i); + for (const MachineOperand &MO : TIB->operands()) { if (!MO.isReg() || !MO.isUse() || !MO.isKill()) continue; unsigned Reg = MO.getReg(); if (!Reg || !LocalDefsSet.count(Reg)) continue; - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - LocalDefsSet.erase(*AI); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + LocalDefsSet.erase(*AI); + } else { + LocalDefsSet.erase(Reg); + } } // Track local defs so we can update liveins. - for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) { - MachineOperand &MO = TIB->getOperand(i); + for (const MachineOperand &MO : TIB->operands()) { if (!MO.isReg() || !MO.isDef() || MO.isDead()) continue; unsigned Reg = MO.getReg(); if (!Reg) continue; LocalDefs.push_back(Reg); - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - LocalDefsSet.insert(*AI); + addRegAndItsAliases(Reg, TRI, LocalDefsSet); } HasDups = true; diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.h b/contrib/llvm/lib/CodeGen/BranchFolding.h index 46c05dc0600a..d759d53e27f2 100644 --- a/contrib/llvm/lib/CodeGen/BranchFolding.h +++ b/contrib/llvm/lib/CodeGen/BranchFolding.h @@ -54,6 +54,7 @@ namespace llvm { typedef std::vector<MergePotentialsElt>::iterator MPIterator; std::vector<MergePotentialsElt> MergePotentials; SmallPtrSet<const MachineBasicBlock*, 2> TriedMerging; + DenseMap<const MachineBasicBlock *, int> FuncletMembership; class SameTailElt { MPIterator MPIter; diff --git a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp index d08fae09323c..abc655ac34ca 100644 --- a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/VirtRegMap.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" @@ -24,6 +25,7 @@ using namespace llvm; void llvm::calculateSpillWeightsAndHints(LiveIntervals &LIS, MachineFunction &MF, + VirtRegMap *VRM, const MachineLoopInfo &MLI, const MachineBlockFrequencyInfo &MBFI, VirtRegAuxInfo::NormalizingFn norm) { @@ -31,7 +33,7 @@ void llvm::calculateSpillWeightsAndHints(LiveIntervals &LIS, << "********** Function: " << MF.getName() << '\n'); MachineRegisterInfo &MRI = MF.getRegInfo(); - VirtRegAuxInfo VRAI(MF, LIS, MLI, MBFI, norm); + VirtRegAuxInfo VRAI(MF, LIS, VRM, MLI, MBFI, norm); for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); if (MRI.reg_nodbg_empty(Reg)) @@ -74,7 +76,10 @@ static unsigned copyHint(const MachineInstr *mi, unsigned reg, // Check if all values in LI are rematerializable static bool isRematerializable(const LiveInterval &LI, const LiveIntervals &LIS, + VirtRegMap *VRM, const TargetInstrInfo &TII) { + unsigned Reg = LI.reg; + unsigned Original = VRM ? VRM->getOriginal(Reg) : 0; for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); I != E; ++I) { const VNInfo *VNI = *I; @@ -86,6 +91,36 @@ static bool isRematerializable(const LiveInterval &LI, MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def); assert(MI && "Dead valno in interval"); + // Trace copies introduced by live range splitting. The inline + // spiller can rematerialize through these copies, so the spill + // weight must reflect this. + if (VRM) { + while (MI->isFullCopy()) { + // The copy destination must match the interval register. + if (MI->getOperand(0).getReg() != Reg) + return false; + + // Get the source register. + Reg = MI->getOperand(1).getReg(); + + // If the original (pre-splitting) registers match this + // copy came from a split. + if (!TargetRegisterInfo::isVirtualRegister(Reg) || + VRM->getOriginal(Reg) != Original) + return false; + + // Follow the copy live-in value. + const LiveInterval &SrcLI = LIS.getInterval(Reg); + LiveQueryResult SrcQ = SrcLI.Query(VNI->def); + VNI = SrcQ.valueIn(); + assert(VNI && "Copy from non-existing value"); + if (VNI->isPHIDef()) + return false; + MI = LIS.getInstructionFromIndex(VNI->def); + assert(MI && "Dead valno in interval"); + } + } + if (!TII.isTriviallyReMaterializable(MI, LIS.getAliasAnalysis())) return false; } @@ -188,7 +223,7 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { // it is a preferred candidate for spilling. // FIXME: this gets much more complicated once we support non-trivial // re-materialization. - if (isRematerializable(li, LIS, *MF.getSubtarget().getInstrInfo())) + if (isRematerializable(li, LIS, VRM, *MF.getSubtarget().getInstrInfo())) totalWeight *= 0.5F; li.weight = normalize(totalWeight, li.getSize(), numInstr); diff --git a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp index fb29b1db7a43..23c0d542560e 100644 --- a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp +++ b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp @@ -32,6 +32,7 @@ CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf, CallOrPrologue(Unknown) { // No stack is used. StackOffset = 0; + MaxStackArgAlign = 1; clearByValRegsInfo(); UsedRegs.resize((TRI.getNumRegs()+31)/32); @@ -192,6 +193,7 @@ static bool isValueTypeInRegForCC(CallingConv::ID CC, MVT VT) { void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs, MVT VT, CCAssignFn Fn) { unsigned SavedStackOffset = StackOffset; + unsigned SavedMaxStackArgAlign = MaxStackArgAlign; unsigned NumLocs = Locs.size(); // Set the 'inreg' flag if it is used for this calling convention. @@ -223,6 +225,7 @@ void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs, // as allocated so that future queries don't return the same registers, i.e. // when i64 and f64 are both passed in GPRs. StackOffset = SavedStackOffset; + MaxStackArgAlign = SavedMaxStackArgAlign; Locs.resize(NumLocs); } diff --git a/contrib/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm/lib/CodeGen/CodeGen.cpp index 155c5ecec772..dc13b5b11d30 100644 --- a/contrib/llvm/lib/CodeGen/CodeGen.cpp +++ b/contrib/llvm/lib/CodeGen/CodeGen.cpp @@ -29,6 +29,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeExpandISelPseudosPass(Registry); initializeExpandPostRAPass(Registry); initializeFinalizeMachineBundlesPass(Registry); + initializeFuncletLayoutPass(Registry); initializeGCMachineCodeAnalysisPass(Registry); initializeGCModuleInfoPass(Registry); initializeIfConverterPass(Registry); @@ -66,6 +67,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeSlotIndexesPass(Registry); initializeStackColoringPass(Registry); initializeStackMapLivenessPass(Registry); + initializeLiveDebugValuesPass(Registry); initializeStackProtectorPass(Registry); initializeStackSlotColoringPass(Registry); initializeTailDuplicatePassPass(Registry); diff --git a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp index 6ab6acc03722..5844124d8565 100644 --- a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -63,6 +64,9 @@ STATISTIC(NumMemoryInsts, "Number of memory instructions whose address " "computations were sunk"); STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); +STATISTIC(NumAndsAdded, + "Number of and mask instructions added to form ext loads"); +STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized"); STATISTIC(NumRetsDup, "Number of return instructions duplicated"); STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved"); STATISTIC(NumSelectsExpanded, "Number of selects turned into branches"); @@ -109,25 +113,18 @@ static cl::opt<bool> StressExtLdPromotion( namespace { typedef SmallPtrSet<Instruction *, 16> SetOfInstrs; -struct TypeIsSExt { - Type *Ty; - bool IsSExt; - TypeIsSExt(Type *Ty, bool IsSExt) : Ty(Ty), IsSExt(IsSExt) {} -}; +typedef PointerIntPair<Type *, 1, bool> TypeIsSExt; typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy; class TypePromotionTransaction; class CodeGenPrepare : public FunctionPass { - /// TLI - Keep a pointer of a TargetLowering to consult for determining - /// transformation profitability. const TargetMachine *TM; const TargetLowering *TLI; const TargetTransformInfo *TTI; const TargetLibraryInfo *TLInfo; - /// CurInstIterator - As we scan instructions optimizing them, this is the - /// next instruction to optimize. Xforms that can invalidate this should - /// update it. + /// As we scan instructions optimizing them, this is the next instruction + /// to optimize. Transforms that can invalidate this should update it. BasicBlock::iterator CurInstIterator; /// Keeps track of non-local addresses that have been sunk into a block. @@ -141,10 +138,10 @@ class TypePromotionTransaction; /// promotion for the current function. InstrToOrigTy PromotedInsts; - /// ModifiedDT - If CFG is modified in anyway. + /// True if CFG is modified in any way. bool ModifiedDT; - /// OptSize - True if optimizing for size. + /// True if optimizing for size. bool OptSize; /// DataLayout for the Function being processed. @@ -167,30 +164,33 @@ class TypePromotionTransaction; } private: - bool EliminateFallThrough(Function &F); - bool EliminateMostlyEmptyBlocks(Function &F); - bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; - void EliminateMostlyEmptyBlock(BasicBlock *BB); - bool OptimizeBlock(BasicBlock &BB, bool& ModifiedDT); - bool OptimizeInst(Instruction *I, bool& ModifiedDT); - bool OptimizeMemoryInst(Instruction *I, Value *Addr, + bool eliminateFallThrough(Function &F); + bool eliminateMostlyEmptyBlocks(Function &F); + bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; + void eliminateMostlyEmptyBlock(BasicBlock *BB); + bool optimizeBlock(BasicBlock &BB, bool& ModifiedDT); + bool optimizeInst(Instruction *I, bool& ModifiedDT); + bool optimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy, unsigned AS); - bool OptimizeInlineAsmInst(CallInst *CS); - bool OptimizeCallInst(CallInst *CI, bool& ModifiedDT); - bool MoveExtToFormExtLoad(Instruction *&I); - bool OptimizeExtUses(Instruction *I); - bool OptimizeSelectInst(SelectInst *SI); - bool OptimizeShuffleVectorInst(ShuffleVectorInst *SI); - bool OptimizeExtractElementInst(Instruction *Inst); - bool DupRetToEnableTailCallOpts(BasicBlock *BB); - bool PlaceDbgValues(Function &F); + bool optimizeInlineAsmInst(CallInst *CS); + bool optimizeCallInst(CallInst *CI, bool& ModifiedDT); + bool moveExtToFormExtLoad(Instruction *&I); + bool optimizeExtUses(Instruction *I); + bool optimizeLoadExt(LoadInst *I); + bool optimizeSelectInst(SelectInst *SI); + bool optimizeShuffleVectorInst(ShuffleVectorInst *SI); + bool optimizeSwitchInst(SwitchInst *CI); + bool optimizeExtractElementInst(Instruction *Inst); + bool dupRetToEnableTailCallOpts(BasicBlock *BB); + bool placeDbgValues(Function &F); bool sinkAndCmp(Function &F); - bool ExtLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI, + bool extLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI, Instruction *&Inst, const SmallVectorImpl<Instruction *> &Exts, unsigned CreatedInstCost); bool splitBranchCondition(Function &F); bool simplifyOffsetableRelocate(Instruction &I); + void stripInvariantGroupMetadata(Instruction &I); }; } @@ -218,7 +218,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { TLI = TM->getSubtargetImpl(F)->getTargetLowering(); TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); - OptSize = F.hasFnAttribute(Attribute::OptimizeForSize); + OptSize = F.optForSize(); /// This optimization identifies DIV instructions that can be /// profitably bypassed and carried out with a shorter, faster divide. @@ -231,12 +231,12 @@ bool CodeGenPrepare::runOnFunction(Function &F) { // Eliminate blocks that contain only PHI nodes and an // unconditional branch. - EverMadeChange |= EliminateMostlyEmptyBlocks(F); + EverMadeChange |= eliminateMostlyEmptyBlocks(F); // llvm.dbg.value is far away from the value then iSel may not be able // handle it properly. iSel will drop llvm.dbg.value if it can not // find a node corresponding to the value. - EverMadeChange |= PlaceDbgValues(F); + EverMadeChange |= placeDbgValues(F); // If there is a mask, compare against zero, and branch that can be combined // into a single target instruction, push the mask and compare into branch @@ -251,9 +251,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) { while (MadeChange) { MadeChange = false; for (Function::iterator I = F.begin(); I != F.end(); ) { - BasicBlock *BB = I++; + BasicBlock *BB = &*I++; bool ModifiedDTOnIteration = false; - MadeChange |= OptimizeBlock(*BB, ModifiedDTOnIteration); + MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration); // Restart BB iteration if the dominator tree of the Function was changed if (ModifiedDTOnIteration) @@ -296,7 +296,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { // Merge pairs of basic blocks with unconditional branches, connected by // a single edge. if (EverMadeChange || MadeChange) - MadeChange |= EliminateFallThrough(F); + MadeChange |= eliminateFallThrough(F); EverMadeChange |= MadeChange; } @@ -314,14 +314,14 @@ bool CodeGenPrepare::runOnFunction(Function &F) { return EverMadeChange; } -/// EliminateFallThrough - Merge basic blocks which are connected -/// by a single edge, where one of the basic blocks has a single successor -/// pointing to the other basic block, which has a single predecessor. -bool CodeGenPrepare::EliminateFallThrough(Function &F) { +/// Merge basic blocks which are connected by a single edge, where one of the +/// basic blocks has a single successor pointing to the other basic block, +/// which has a single predecessor. +bool CodeGenPrepare::eliminateFallThrough(Function &F) { bool Changed = false; // Scan all of the blocks in the function, except for the entry block. for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) { - BasicBlock *BB = I++; + BasicBlock *BB = &*I++; // If the destination block has a single pred, then this is a trivial // edge, just collapse it. BasicBlock *SinglePred = BB->getSinglePredecessor(); @@ -342,22 +342,21 @@ bool CodeGenPrepare::EliminateFallThrough(Function &F) { BB->moveBefore(&BB->getParent()->getEntryBlock()); // We have erased a block. Update the iterator. - I = BB; + I = BB->getIterator(); } } return Changed; } -/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes, -/// debug info directives, and an unconditional branch. Passes before isel -/// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for -/// isel. Start by eliminating these blocks so we can split them the way we -/// want them. -bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) { +/// Eliminate blocks that contain only PHI nodes, debug info directives, and an +/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split +/// edges in ways that are non-optimal for isel. Start by eliminating these +/// blocks so we can split them the way we want them. +bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) { bool MadeChange = false; // Note that this intentionally skips the entry block. for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) { - BasicBlock *BB = I++; + BasicBlock *BB = &*I++; // If this block doesn't end with an uncond branch, ignore it. BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); @@ -366,7 +365,7 @@ bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) { // If the instruction before the branch (skipping debug info) isn't a phi // node, then other stuff is happening here. - BasicBlock::iterator BBI = BI; + BasicBlock::iterator BBI = BI->getIterator(); if (BBI != BB->begin()) { --BBI; while (isa<DbgInfoIntrinsic>(BBI)) { @@ -383,19 +382,19 @@ bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) { if (DestBB == BB) continue; - if (!CanMergeBlocks(BB, DestBB)) + if (!canMergeBlocks(BB, DestBB)) continue; - EliminateMostlyEmptyBlock(BB); + eliminateMostlyEmptyBlock(BB); MadeChange = true; } return MadeChange; } -/// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a -/// single uncond branch between them, and BB contains no other non-phi +/// Return true if we can merge BB into DestBB if there is a single +/// unconditional branch between them, and BB contains no other non-phi /// instructions. -bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB, +bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const { // We only want to eliminate blocks whose phi nodes are used by phi nodes in // the successor. If there are more complex condition (e.g. preheaders), @@ -461,9 +460,9 @@ bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB, } -/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and -/// an unconditional branch in it. -void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { +/// Eliminate a basic block that has only phi's and an unconditional branch in +/// it. +void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) { BranchInst *BI = cast<BranchInst>(BB->getTerminator()); BasicBlock *DestBB = BI->getSuccessor(0); @@ -594,6 +593,14 @@ simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase, continue; } + if (RelocatedBase->getParent() != ToReplace->getParent()) { + // Base and derived relocates are in different basic blocks. + // In this case transform is only valid when base dominates derived + // relocate. However it would be too expensive to check dominance + // for each such relocate, so we skip the whole transformation. + continue; + } + Value *Base = ThisRelocate.getBasePtr(); auto Derived = dyn_cast<GetElementPtrInst>(ThisRelocate.getDerivedPtr()); if (!Derived || Derived->getPointerOperand() != Base) @@ -631,21 +638,20 @@ simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase, // In this case, we can not find the bitcast any more. So we insert a new bitcast // no matter there is already one or not. In this way, we can handle all cases, and // the extra bitcast should be optimized away in later passes. - Instruction *ActualRelocatedBase = RelocatedBase; + Value *ActualRelocatedBase = RelocatedBase; if (RelocatedBase->getType() != Base->getType()) { ActualRelocatedBase = - cast<Instruction>(Builder.CreateBitCast(RelocatedBase, Base->getType())); + Builder.CreateBitCast(RelocatedBase, Base->getType()); } Value *Replacement = Builder.CreateGEP( Derived->getSourceElementType(), ActualRelocatedBase, makeArrayRef(OffsetV)); - Instruction *ReplacementInst = cast<Instruction>(Replacement); Replacement->takeName(ToReplace); // If the newly generated derived pointer's type does not match the original derived // pointer's type, cast the new derived pointer to match it. Same reasoning as above. - Instruction *ActualReplacement = ReplacementInst; - if (ReplacementInst->getType() != ToReplace->getType()) { + Value *ActualReplacement = Replacement; + if (Replacement->getType() != ToReplace->getType()) { ActualReplacement = - cast<Instruction>(Builder.CreateBitCast(ReplacementInst, ToReplace->getType())); + Builder.CreateBitCast(Replacement, ToReplace->getType()); } ToReplace->replaceAllUsesWith(ActualReplacement); ToReplace->eraseFromParent(); @@ -723,6 +729,12 @@ static bool SinkCast(CastInst *CI) { // Preincrement use iterator so we don't invalidate it. ++UI; + // If the block selected to receive the cast is an EH pad that does not + // allow non-PHI instructions before the terminator, we can't sink the + // cast. + if (UserBB->getTerminator()->isEHPad()) + continue; + // If this user is in the same block as the cast, don't change the cast. if (UserBB == DefBB) continue; @@ -731,9 +743,9 @@ static bool SinkCast(CastInst *CI) { if (!InsertedCast) { BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); - InsertedCast = - CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "", - InsertPt); + assert(InsertPt != UserBB->end()); + InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0), + CI->getType(), "", &*InsertPt); } // Replace a use of the cast with a use of the new cast. @@ -751,10 +763,9 @@ static bool SinkCast(CastInst *CI) { return MadeChange; } -/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop -/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC), -/// sink it into user blocks to reduce the number of virtual -/// registers that must be created and coalesced. +/// If the specified cast instruction is a noop copy (e.g. it's casting from +/// one pointer type to another, i32->i8 on PPC), sink it into user blocks to +/// reduce the number of virtual registers that must be created and coalesced. /// /// Return true if any changes are made. /// @@ -789,8 +800,8 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, return SinkCast(CI); } -/// CombineUAddWithOverflow - try to combine CI into a call to the -/// llvm.uadd.with.overflow intrinsic if possible. +/// Try to combine CI into a call to the llvm.uadd.with.overflow intrinsic if +/// possible. /// /// Return true if any changes were made. static bool CombineUAddWithOverflow(CmpInst *CI) { @@ -818,7 +829,7 @@ static bool CombineUAddWithOverflow(CmpInst *CI) { assert(*AddI->user_begin() == CI && "expected!"); #endif - Module *M = CI->getParent()->getParent()->getParent(); + Module *M = CI->getModule(); Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty); auto *InsertPt = AddI->hasOneUse() ? CI : AddI; @@ -836,16 +847,16 @@ static bool CombineUAddWithOverflow(CmpInst *CI) { return true; } -/// SinkCmpExpression - Sink the given CmpInst into user blocks to reduce -/// the number of virtual registers that must be created and coalesced. This is -/// a clear win except on targets with multiple condition code registers -/// (PowerPC), where it might lose; some adjustment may be wanted there. +/// Sink the given CmpInst into user blocks to reduce the number of virtual +/// registers that must be created and coalesced. This is a clear win except on +/// targets with multiple condition code registers (PowerPC), where it might +/// lose; some adjustment may be wanted there. /// /// Return true if any changes are made. static bool SinkCmpExpression(CmpInst *CI) { BasicBlock *DefBB = CI->getParent(); - /// InsertedCmp - Only insert a cmp in each block once. + /// Only insert a cmp in each block once. DenseMap<BasicBlock*, CmpInst*> InsertedCmps; bool MadeChange = false; @@ -872,10 +883,10 @@ static bool SinkCmpExpression(CmpInst *CI) { if (!InsertedCmp) { BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); + assert(InsertPt != UserBB->end()); InsertedCmp = - CmpInst::Create(CI->getOpcode(), - CI->getPredicate(), CI->getOperand(0), - CI->getOperand(1), "", InsertPt); + CmpInst::Create(CI->getOpcode(), CI->getPredicate(), + CI->getOperand(0), CI->getOperand(1), "", &*InsertPt); } // Replace a use of the cmp with a use of the new cmp. @@ -903,8 +914,8 @@ static bool OptimizeCmpExpression(CmpInst *CI) { return false; } -/// isExtractBitsCandidateUse - Check if the candidates could -/// be combined with shift instruction, which includes: +/// Check if the candidates could be combined with a shift instruction, which +/// includes: /// 1. Truncate instruction /// 2. And instruction and the imm is a mask of the low bits: /// imm & (imm+1) == 0 @@ -922,8 +933,7 @@ static bool isExtractBitsCandidateUse(Instruction *User) { return true; } -/// SinkShiftAndTruncate - sink both shift and truncate instruction -/// to the use of truncate's BB. +/// Sink both shift and truncate instruction to the use of truncate's BB. static bool SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts, @@ -970,20 +980,22 @@ SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, if (!InsertedShift && !InsertedTrunc) { BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt(); + assert(InsertPt != TruncUserBB->end()); // Sink the shift if (ShiftI->getOpcode() == Instruction::AShr) - InsertedShift = - BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt); + InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, + "", &*InsertPt); else - InsertedShift = - BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt); + InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, + "", &*InsertPt); // Sink the trunc BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt(); TruncInsertPt++; + assert(TruncInsertPt != TruncUserBB->end()); InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift, - TruncI->getType(), "", TruncInsertPt); + TruncI->getType(), "", &*TruncInsertPt); MadeChange = true; @@ -993,10 +1005,10 @@ SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, return MadeChange; } -/// OptimizeExtractBits - sink the shift *right* instruction into user blocks if -/// the uses could potentially be combined with this shift instruction and -/// generate BitExtract instruction. It will only be applied if the architecture -/// supports BitExtract instruction. Here is an example: +/// Sink the shift *right* instruction into user blocks if the uses could +/// potentially be combined with this shift instruction and generate BitExtract +/// instruction. It will only be applied if the architecture supports BitExtract +/// instruction. Here is an example: /// BB1: /// %x.extract.shift = lshr i64 %arg1, 32 /// BB2: @@ -1067,13 +1079,14 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, if (!InsertedShift) { BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); + assert(InsertPt != UserBB->end()); if (ShiftI->getOpcode() == Instruction::AShr) - InsertedShift = - BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt); + InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, + "", &*InsertPt); else - InsertedShift = - BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt); + InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, + "", &*InsertPt); MadeChange = true; } @@ -1089,10 +1102,10 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, return MadeChange; } -// ScalarizeMaskedLoad() translates masked load intrinsic, like +// Translate a masked load intrinsic like // <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align, // <16 x i1> %mask, <16 x i32> %passthru) -// to a chain of basic blocks, whith loading element one-by-one if +// to a chain of basic blocks, with loading element one-by-one if // the appropriate mask bit is set // // %1 = bitcast i8* %addr to i32* @@ -1126,35 +1139,68 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, // static void ScalarizeMaskedLoad(CallInst *CI) { Value *Ptr = CI->getArgOperand(0); - Value *Src0 = CI->getArgOperand(3); + Value *Alignment = CI->getArgOperand(1); Value *Mask = CI->getArgOperand(2); - VectorType *VecType = dyn_cast<VectorType>(CI->getType()); - Type *EltTy = VecType->getElementType(); + Value *Src0 = CI->getArgOperand(3); + unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue(); + VectorType *VecType = dyn_cast<VectorType>(CI->getType()); assert(VecType && "Unexpected return type of masked load intrinsic"); + Type *EltTy = CI->getType()->getVectorElementType(); + IRBuilder<> Builder(CI->getContext()); Instruction *InsertPt = CI; BasicBlock *IfBlock = CI->getParent(); BasicBlock *CondBlock = nullptr; BasicBlock *PrevIfBlock = CI->getParent(); - Builder.SetInsertPoint(InsertPt); + Builder.SetInsertPoint(InsertPt); Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + // Short-cut if the mask is all-true. + bool IsAllOnesMask = isa<Constant>(Mask) && + cast<Constant>(Mask)->isAllOnesValue(); + + if (IsAllOnesMask) { + Value *NewI = Builder.CreateAlignedLoad(Ptr, AlignVal); + CI->replaceAllUsesWith(NewI); + CI->eraseFromParent(); + return; + } + + // Adjust alignment for the scalar instruction. + AlignVal = std::min(AlignVal, VecType->getScalarSizeInBits()/8); // Bitcast %addr fron i8* to EltTy* Type *NewPtrType = EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace()); Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); + unsigned VectorWidth = VecType->getNumElements(); + Value *UndefVal = UndefValue::get(VecType); // The result vector Value *VResult = UndefVal; + if (isa<ConstantVector>(Mask)) { + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue()) + continue; + Value *Gep = + Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); + LoadInst* Load = Builder.CreateAlignedLoad(Gep, AlignVal); + VResult = Builder.CreateInsertElement(VResult, Load, + Builder.getInt32(Idx)); + } + Value *NewI = Builder.CreateSelect(Mask, VResult, Src0); + CI->replaceAllUsesWith(NewI); + CI->eraseFromParent(); + return; + } + PHINode *Phi = nullptr; Value *PrevPhi = UndefVal; - unsigned VectorWidth = VecType->getNumElements(); for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { // Fill the "else" block, created in the previous iteration @@ -1182,16 +1228,17 @@ static void ScalarizeMaskedLoad(CallInst *CI) { // %Elt = load i32* %EltAddr // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx // - CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load"); + CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.load"); Builder.SetInsertPoint(InsertPt); Value *Gep = Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); - LoadInst* Load = Builder.CreateLoad(Gep, false); + LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal); VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx)); // Create "else" block, fill it in the next iteration - BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); + BasicBlock *NewIfBlock = + CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); Builder.SetInsertPoint(InsertPt); Instruction *OldBr = IfBlock->getTerminator(); BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); @@ -1208,7 +1255,7 @@ static void ScalarizeMaskedLoad(CallInst *CI) { CI->eraseFromParent(); } -// ScalarizeMaskedStore() translates masked store intrinsic, like +// Translate a masked store intrinsic, like // void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align, // <16 x i1> %mask) // to a chain of basic blocks, that stores element one-by-one if @@ -1237,34 +1284,61 @@ static void ScalarizeMaskedLoad(CallInst *CI) { // br label %else2 // . . . static void ScalarizeMaskedStore(CallInst *CI) { - Value *Ptr = CI->getArgOperand(1); Value *Src = CI->getArgOperand(0); + Value *Ptr = CI->getArgOperand(1); + Value *Alignment = CI->getArgOperand(2); Value *Mask = CI->getArgOperand(3); + unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue(); VectorType *VecType = dyn_cast<VectorType>(Src->getType()); - Type *EltTy = VecType->getElementType(); - assert(VecType && "Unexpected data type in masked store intrinsic"); + Type *EltTy = VecType->getElementType(); + IRBuilder<> Builder(CI->getContext()); Instruction *InsertPt = CI; BasicBlock *IfBlock = CI->getParent(); Builder.SetInsertPoint(InsertPt); Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + // Short-cut if the mask is all-true. + bool IsAllOnesMask = isa<Constant>(Mask) && + cast<Constant>(Mask)->isAllOnesValue(); + + if (IsAllOnesMask) { + Builder.CreateAlignedStore(Src, Ptr, AlignVal); + CI->eraseFromParent(); + return; + } + + // Adjust alignment for the scalar instruction. + AlignVal = std::max(AlignVal, VecType->getScalarSizeInBits()/8); // Bitcast %addr fron i8* to EltTy* Type *NewPtrType = EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace()); Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); - unsigned VectorWidth = VecType->getNumElements(); + + if (isa<ConstantVector>(Mask)) { + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue()) + continue; + Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx)); + Value *Gep = + Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); + Builder.CreateAlignedStore(OneElt, Gep, AlignVal); + } + CI->eraseFromParent(); + return; + } + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { // Fill the "else" block, created in the previous iteration // // %mask_1 = extractelement <16 x i1> %mask, i32 Idx // %to_store = icmp eq i1 %mask_1, true - // br i1 %to_load, label %cond.store, label %else + // br i1 %to_store, label %cond.store, label %else // Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx)); Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, @@ -1276,13 +1350,146 @@ static void ScalarizeMaskedStore(CallInst *CI) { // %EltAddr = getelementptr i32* %1, i32 0 // %store i32 %OneElt, i32* %EltAddr // - BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store"); + BasicBlock *CondBlock = + IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store"); Builder.SetInsertPoint(InsertPt); - + Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx)); Value *Gep = Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); - Builder.CreateStore(OneElt, Gep); + Builder.CreateAlignedStore(OneElt, Gep, AlignVal); + + // Create "else" block, fill it in the next iteration + BasicBlock *NewIfBlock = + CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); + Builder.SetInsertPoint(InsertPt); + Instruction *OldBr = IfBlock->getTerminator(); + BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); + OldBr->eraseFromParent(); + IfBlock = NewIfBlock; + } + CI->eraseFromParent(); +} + +// Translate a masked gather intrinsic like +// <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4, +// <16 x i1> %Mask, <16 x i32> %Src) +// to a chain of basic blocks, with loading element one-by-one if +// the appropriate mask bit is set +// +// % Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind +// % Mask0 = extractelement <16 x i1> %Mask, i32 0 +// % ToLoad0 = icmp eq i1 % Mask0, true +// br i1 % ToLoad0, label %cond.load, label %else +// +// cond.load: +// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 +// % Load0 = load i32, i32* % Ptr0, align 4 +// % Res0 = insertelement <16 x i32> undef, i32 % Load0, i32 0 +// br label %else +// +// else: +// %res.phi.else = phi <16 x i32>[% Res0, %cond.load], [undef, % 0] +// % Mask1 = extractelement <16 x i1> %Mask, i32 1 +// % ToLoad1 = icmp eq i1 % Mask1, true +// br i1 % ToLoad1, label %cond.load1, label %else2 +// +// cond.load1: +// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 +// % Load1 = load i32, i32* % Ptr1, align 4 +// % Res1 = insertelement <16 x i32> %res.phi.else, i32 % Load1, i32 1 +// br label %else2 +// . . . +// % Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src +// ret <16 x i32> %Result +static void ScalarizeMaskedGather(CallInst *CI) { + Value *Ptrs = CI->getArgOperand(0); + Value *Alignment = CI->getArgOperand(1); + Value *Mask = CI->getArgOperand(2); + Value *Src0 = CI->getArgOperand(3); + + VectorType *VecType = dyn_cast<VectorType>(CI->getType()); + + assert(VecType && "Unexpected return type of masked load intrinsic"); + + IRBuilder<> Builder(CI->getContext()); + Instruction *InsertPt = CI; + BasicBlock *IfBlock = CI->getParent(); + BasicBlock *CondBlock = nullptr; + BasicBlock *PrevIfBlock = CI->getParent(); + Builder.SetInsertPoint(InsertPt); + unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue(); + + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + Value *UndefVal = UndefValue::get(VecType); + + // The result vector + Value *VResult = UndefVal; + unsigned VectorWidth = VecType->getNumElements(); + + // Shorten the way if the mask is a vector of constants. + bool IsConstMask = isa<ConstantVector>(Mask); + + if (IsConstMask) { + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue()) + continue; + Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), + "Ptr" + Twine(Idx)); + LoadInst *Load = Builder.CreateAlignedLoad(Ptr, AlignVal, + "Load" + Twine(Idx)); + VResult = Builder.CreateInsertElement(VResult, Load, + Builder.getInt32(Idx), + "Res" + Twine(Idx)); + } + Value *NewI = Builder.CreateSelect(Mask, VResult, Src0); + CI->replaceAllUsesWith(NewI); + CI->eraseFromParent(); + return; + } + + PHINode *Phi = nullptr; + Value *PrevPhi = UndefVal; + + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + + // Fill the "else" block, created in the previous iteration + // + // %Mask1 = extractelement <16 x i1> %Mask, i32 1 + // %ToLoad1 = icmp eq i1 %Mask1, true + // br i1 %ToLoad1, label %cond.load, label %else + // + if (Idx > 0) { + Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); + Phi->addIncoming(VResult, CondBlock); + Phi->addIncoming(PrevPhi, PrevIfBlock); + PrevPhi = Phi; + VResult = Phi; + } + + Value *Predicate = Builder.CreateExtractElement(Mask, + Builder.getInt32(Idx), + "Mask" + Twine(Idx)); + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, + ConstantInt::get(Predicate->getType(), 1), + "ToLoad" + Twine(Idx)); + + // Create "cond" block + // + // %EltAddr = getelementptr i32* %1, i32 0 + // %Elt = load i32* %EltAddr + // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx + // + CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load"); + Builder.SetInsertPoint(InsertPt); + + Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), + "Ptr" + Twine(Idx)); + LoadInst *Load = Builder.CreateAlignedLoad(Ptr, AlignVal, + "Load" + Twine(Idx)); + VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx), + "Res" + Twine(Idx)); // Create "else" block, fill it in the next iteration BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); @@ -1290,12 +1497,204 @@ static void ScalarizeMaskedStore(CallInst *CI) { Instruction *OldBr = IfBlock->getTerminator(); BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); OldBr->eraseFromParent(); + PrevIfBlock = IfBlock; IfBlock = NewIfBlock; } + + Phi = Builder.CreatePHI(VecType, 2, "res.phi.select"); + Phi->addIncoming(VResult, CondBlock); + Phi->addIncoming(PrevPhi, PrevIfBlock); + Value *NewI = Builder.CreateSelect(Mask, Phi, Src0); + CI->replaceAllUsesWith(NewI); CI->eraseFromParent(); } -bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) { +// Translate a masked scatter intrinsic, like +// void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4, +// <16 x i1> %Mask) +// to a chain of basic blocks, that stores element one-by-one if +// the appropriate mask bit is set. +// +// % Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind +// % Mask0 = extractelement <16 x i1> % Mask, i32 0 +// % ToStore0 = icmp eq i1 % Mask0, true +// br i1 %ToStore0, label %cond.store, label %else +// +// cond.store: +// % Elt0 = extractelement <16 x i32> %Src, i32 0 +// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 +// store i32 %Elt0, i32* % Ptr0, align 4 +// br label %else +// +// else: +// % Mask1 = extractelement <16 x i1> % Mask, i32 1 +// % ToStore1 = icmp eq i1 % Mask1, true +// br i1 % ToStore1, label %cond.store1, label %else2 +// +// cond.store1: +// % Elt1 = extractelement <16 x i32> %Src, i32 1 +// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 +// store i32 % Elt1, i32* % Ptr1, align 4 +// br label %else2 +// . . . +static void ScalarizeMaskedScatter(CallInst *CI) { + Value *Src = CI->getArgOperand(0); + Value *Ptrs = CI->getArgOperand(1); + Value *Alignment = CI->getArgOperand(2); + Value *Mask = CI->getArgOperand(3); + + assert(isa<VectorType>(Src->getType()) && + "Unexpected data type in masked scatter intrinsic"); + assert(isa<VectorType>(Ptrs->getType()) && + isa<PointerType>(Ptrs->getType()->getVectorElementType()) && + "Vector of pointers is expected in masked scatter intrinsic"); + + IRBuilder<> Builder(CI->getContext()); + Instruction *InsertPt = CI; + BasicBlock *IfBlock = CI->getParent(); + Builder.SetInsertPoint(InsertPt); + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue(); + unsigned VectorWidth = Src->getType()->getVectorNumElements(); + + // Shorten the way if the mask is a vector of constants. + bool IsConstMask = isa<ConstantVector>(Mask); + + if (IsConstMask) { + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue()) + continue; + Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx), + "Elt" + Twine(Idx)); + Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), + "Ptr" + Twine(Idx)); + Builder.CreateAlignedStore(OneElt, Ptr, AlignVal); + } + CI->eraseFromParent(); + return; + } + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + // Fill the "else" block, created in the previous iteration + // + // % Mask1 = extractelement <16 x i1> % Mask, i32 Idx + // % ToStore = icmp eq i1 % Mask1, true + // br i1 % ToStore, label %cond.store, label %else + // + Value *Predicate = Builder.CreateExtractElement(Mask, + Builder.getInt32(Idx), + "Mask" + Twine(Idx)); + Value *Cmp = + Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, + ConstantInt::get(Predicate->getType(), 1), + "ToStore" + Twine(Idx)); + + // Create "cond" block + // + // % Elt1 = extractelement <16 x i32> %Src, i32 1 + // % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 + // %store i32 % Elt1, i32* % Ptr1 + // + BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store"); + Builder.SetInsertPoint(InsertPt); + + Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx), + "Elt" + Twine(Idx)); + Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), + "Ptr" + Twine(Idx)); + Builder.CreateAlignedStore(OneElt, Ptr, AlignVal); + + // Create "else" block, fill it in the next iteration + BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); + Builder.SetInsertPoint(InsertPt); + Instruction *OldBr = IfBlock->getTerminator(); + BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); + OldBr->eraseFromParent(); + IfBlock = NewIfBlock; + } + CI->eraseFromParent(); +} + +/// If counting leading or trailing zeros is an expensive operation and a zero +/// input is defined, add a check for zero to avoid calling the intrinsic. +/// +/// We want to transform: +/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false) +/// +/// into: +/// entry: +/// %cmpz = icmp eq i64 %A, 0 +/// br i1 %cmpz, label %cond.end, label %cond.false +/// cond.false: +/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true) +/// br label %cond.end +/// cond.end: +/// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ] +/// +/// If the transform is performed, return true and set ModifiedDT to true. +static bool despeculateCountZeros(IntrinsicInst *CountZeros, + const TargetLowering *TLI, + const DataLayout *DL, + bool &ModifiedDT) { + if (!TLI || !DL) + return false; + + // If a zero input is undefined, it doesn't make sense to despeculate that. + if (match(CountZeros->getOperand(1), m_One())) + return false; + + // If it's cheap to speculate, there's nothing to do. + auto IntrinsicID = CountZeros->getIntrinsicID(); + if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz()) || + (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz())) + return false; + + // Only handle legal scalar cases. Anything else requires too much work. + Type *Ty = CountZeros->getType(); + unsigned SizeInBits = Ty->getPrimitiveSizeInBits(); + if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSize()) + return false; + + // The intrinsic will be sunk behind a compare against zero and branch. + BasicBlock *StartBlock = CountZeros->getParent(); + BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false"); + + // Create another block after the count zero intrinsic. A PHI will be added + // in this block to select the result of the intrinsic or the bit-width + // constant if the input to the intrinsic is zero. + BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(CountZeros)); + BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end"); + + // Set up a builder to create a compare, conditional branch, and PHI. + IRBuilder<> Builder(CountZeros->getContext()); + Builder.SetInsertPoint(StartBlock->getTerminator()); + Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc()); + + // Replace the unconditional branch that was created by the first split with + // a compare against zero and a conditional branch. + Value *Zero = Constant::getNullValue(Ty); + Value *Cmp = Builder.CreateICmpEQ(CountZeros->getOperand(0), Zero, "cmpz"); + Builder.CreateCondBr(Cmp, EndBlock, CallBlock); + StartBlock->getTerminator()->eraseFromParent(); + + // Create a PHI in the end block to select either the output of the intrinsic + // or the bit width of the operand. + Builder.SetInsertPoint(&EndBlock->front()); + PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz"); + CountZeros->replaceAllUsesWith(PN); + Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits)); + PN->addIncoming(BitWidth, StartBlock); + PN->addIncoming(CountZeros, CallBlock); + + // We are explicitly handling the zero case, so we can set the intrinsic's + // undefined zero argument to 'true'. This will also prevent reprocessing the + // intrinsic; we only despeculate when a zero input is defined. + CountZeros->setArgOperand(1, Builder.getTrue()); + ModifiedDT = true; + return true; +} + +bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) { BasicBlock *BB = CI->getParent(); // Lower inline assembly if we can. @@ -1311,7 +1710,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) { return true; } // Sink address computing for memory operands into the block. - if (OptimizeInlineAsmInst(CI)) + if (optimizeInlineAsmInst(CI)) return true; } @@ -1372,14 +1771,14 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) { // Substituting this can cause recursive simplifications, which can // invalidate our iterator. Use a WeakVH to hold onto it in case this // happens. - WeakVH IterHandle(CurInstIterator); + WeakVH IterHandle(&*CurInstIterator); replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr); // If the iterator instruction was recursively deleted, start over at the // start of the block. - if (IterHandle != CurInstIterator) { + if (IterHandle != CurInstIterator.getNodePtrUnchecked()) { CurInstIterator = BB->begin(); SunkAddrs.clear(); } @@ -1387,7 +1786,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) { } case Intrinsic::masked_load: { // Scalarize unsupported vector masked load - if (!TTI->isLegalMaskedLoad(CI->getType(), 1)) { + if (!TTI->isLegalMaskedLoad(CI->getType())) { ScalarizeMaskedLoad(CI); ModifiedDT = true; return true; @@ -1395,13 +1794,29 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) { return false; } case Intrinsic::masked_store: { - if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType(), 1)) { + if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType())) { ScalarizeMaskedStore(CI); ModifiedDT = true; return true; } return false; } + case Intrinsic::masked_gather: { + if (!TTI->isLegalMaskedGather(CI->getType())) { + ScalarizeMaskedGather(CI); + ModifiedDT = true; + return true; + } + return false; + } + case Intrinsic::masked_scatter: { + if (!TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType())) { + ScalarizeMaskedScatter(CI); + ModifiedDT = true; + return true; + } + return false; + } case Intrinsic::aarch64_stlxr: case Intrinsic::aarch64_stxr: { ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0)); @@ -1415,6 +1830,15 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) { InsertedInsts.insert(ExtVal); return true; } + case Intrinsic::invariant_group_barrier: + II->replaceAllUsesWith(II->getArgOperand(0)); + II->eraseFromParent(); + return true; + + case Intrinsic::cttz: + case Intrinsic::ctlz: + // If counting zeros is expensive, try to avoid it. + return despeculateCountZeros(II, TLI, DL, ModifiedDT); } if (TLI) { @@ -1426,7 +1850,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) { Type *AccessTy; if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy, AddrSpace)) while (!PtrOps.empty()) - if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy, AddrSpace)) + if (optimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy, AddrSpace)) return true; } } @@ -1447,9 +1871,8 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) { return false; } -/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return -/// instructions to the predecessor to enable tail call optimizations. The -/// case it is currently looking for is: +/// Look for opportunities to duplicate return instructions to the predecessor +/// to enable tail call optimizations. The case it is currently looking for is: /// @code /// bb0: /// %tmp0 = tail call i32 @f0() @@ -1478,7 +1901,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) { /// %tmp2 = tail call i32 @f2() /// ret i32 %tmp2 /// @endcode -bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) { +bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) { if (!TLI) return false; @@ -1597,7 +2020,7 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) { namespace { -/// ExtAddrMode - This is an extended version of TargetLowering::AddrMode +/// This is an extended version of TargetLowering::AddrMode /// which holds actual Value*'s for register values. struct ExtAddrMode : public TargetLowering::AddrMode { Value *BaseReg; @@ -1709,10 +2132,10 @@ class TypePromotionTransaction { public: /// \brief Record the position of \p Inst. InsertionHandler(Instruction *Inst) { - BasicBlock::iterator It = Inst; + BasicBlock::iterator It = Inst->getIterator(); HasPrevInstruction = (It != (Inst->getParent()->begin())); if (HasPrevInstruction) - Point.PrevInst = --It; + Point.PrevInst = &*--It; else Point.BB = Inst->getParent(); } @@ -1724,7 +2147,7 @@ class TypePromotionTransaction { Inst->removeFromParent(); Inst->insertAfter(Point.PrevInst); } else { - Instruction *Position = Point.BB->getFirstInsertionPt(); + Instruction *Position = &*Point.BB->getFirstInsertionPt(); if (Inst->getParent()) Inst->moveBefore(Position); else @@ -1797,7 +2220,7 @@ class TypePromotionTransaction { Value *Val = Inst->getOperand(It); OriginalValues.push_back(Val); // Set a dummy one. - // We could use OperandSetter here, but that would implied an overhead + // We could use OperandSetter here, but that would imply an overhead // that we are not willing to pay. Inst->setOperand(It, UndefValue::get(Val->getType())); } @@ -2111,7 +2534,7 @@ class AddressingModeMatcher { unsigned AddrSpace; Instruction *MemoryInst; - /// AddrMode - This is the addressing mode that we're building up. This is + /// This is the addressing mode that we're building up. This is /// part of the return value of this addressing mode matching stuff. ExtAddrMode &AddrMode; @@ -2122,9 +2545,8 @@ class AddressingModeMatcher { /// The ongoing transaction where every action should be registered. TypePromotionTransaction &TPT; - /// IgnoreProfitability - This is set to true when we should not do - /// profitability checks. When true, IsProfitableToFoldIntoAddressingMode - /// always returns true. + /// This is set to true when we should not do profitability checks. + /// When true, IsProfitableToFoldIntoAddressingMode always returns true. bool IgnoreProfitability; AddressingModeMatcher(SmallVectorImpl<Instruction *> &AMI, @@ -2143,7 +2565,7 @@ class AddressingModeMatcher { } public: - /// Match - Find the maximal addressing mode that a load/store of V can fold, + /// Find the maximal addressing mode that a load/store of V can fold, /// give an access type of AccessTy. This returns a list of involved /// instructions in AddrModeInsts. /// \p InsertedInsts The instructions inserted by other CodeGenPrepare @@ -2161,32 +2583,32 @@ public: bool Success = AddressingModeMatcher(AddrModeInsts, TM, AccessTy, AS, MemoryInst, Result, InsertedInsts, - PromotedInsts, TPT).MatchAddr(V, 0); + PromotedInsts, TPT).matchAddr(V, 0); (void)Success; assert(Success && "Couldn't select *anything*?"); return Result; } private: - bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth); - bool MatchAddr(Value *V, unsigned Depth); - bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth, + bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth); + bool matchAddr(Value *V, unsigned Depth); + bool matchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth, bool *MovedAway = nullptr); - bool IsProfitableToFoldIntoAddressingMode(Instruction *I, + bool isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, ExtAddrMode &AMAfter); - bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2); - bool IsPromotionProfitable(unsigned NewCost, unsigned OldCost, + bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2); + bool isPromotionProfitable(unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const; }; -/// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode. +/// Try adding ScaleReg*Scale to the current addressing mode. /// Return true and update AddrMode if this addr mode is legal for the target, /// false if not. -bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale, +bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth) { // If Scale is 1, then this is the same as adding ScaleReg to the addressing // mode. Just process that directly. if (Scale == 1) - return MatchAddr(ScaleReg, Depth); + return matchAddr(ScaleReg, Depth); // If the scale is 0, it takes nothing to add this. if (Scale == 0) @@ -2233,9 +2655,9 @@ bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale, return true; } -/// MightBeFoldableInst - This is a little filter, which returns true if an -/// addressing computation involving I might be folded into a load/store -/// accessing it. This doesn't need to be perfect, but needs to accept at least +/// This is a little filter, which returns true if an addressing computation +/// involving I might be folded into a load/store accessing it. +/// This doesn't need to be perfect, but needs to accept at least /// the set of instructions that MatchOperationAddr can. static bool MightBeFoldableInst(Instruction *I) { switch (I->getOpcode()) { @@ -2301,9 +2723,7 @@ class TypePromotionHelper { /// \brief Utility function to determine if \p OpIdx should be promoted when /// promoting \p Inst. static bool shouldExtOperand(const Instruction *Inst, int OpIdx) { - if (isa<SelectInst>(Inst) && OpIdx == 0) - return false; - return true; + return !(isa<SelectInst>(Inst) && OpIdx == 0); } /// \brief Utility function to promote the operand of \p Ext when this @@ -2413,8 +2833,7 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst, Value *OpndVal = Inst->getOperand(0); // Check if we can use this operand in the extension. - // If the type is larger than the result type of the extension, - // we cannot. + // If the type is larger than the result type of the extension, we cannot. if (!OpndVal->getType()->isIntegerTy() || OpndVal->getType()->getIntegerBitWidth() > ConsideredExtType->getIntegerBitWidth()) @@ -2433,18 +2852,16 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst, // #1 get the type of the operand and check the kind of the extended bits. const Type *OpndType; InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd); - if (It != PromotedInsts.end() && It->second.IsSExt == IsSExt) - OpndType = It->second.Ty; + if (It != PromotedInsts.end() && It->second.getInt() == IsSExt) + OpndType = It->second.getPointer(); else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd))) OpndType = Opnd->getOperand(0)->getType(); else return false; - // #2 check that the truncate just drop extended bits. - if (Inst->getType()->getIntegerBitWidth() >= OpndType->getIntegerBitWidth()) - return true; - - return false; + // #2 check that the truncate just drops extended bits. + return Inst->getType()->getIntegerBitWidth() >= + OpndType->getIntegerBitWidth(); } TypePromotionHelper::Action TypePromotionHelper::getAction( @@ -2553,7 +2970,7 @@ Value *TypePromotionHelper::promoteOperandForOther( } TPT.replaceAllUsesWith(ExtOpnd, Trunc); - // Restore the operand of Ext (which has been replace by the previous call + // Restore the operand of Ext (which has been replaced by the previous call // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext. TPT.setOperand(Ext, 0, ExtOpnd); } @@ -2631,8 +3048,7 @@ Value *TypePromotionHelper::promoteOperandForOther( return ExtOpnd; } -/// IsPromotionProfitable - Check whether or not promoting an instruction -/// to a wider type was profitable. +/// Check whether or not promoting an instruction to a wider type is profitable. /// \p NewCost gives the cost of extension instructions created by the /// promotion. /// \p OldCost gives the cost of extension instructions before the promotion @@ -2640,7 +3056,7 @@ Value *TypePromotionHelper::promoteOperandForOther( /// matched in the addressing mode the promotion. /// \p PromotedOperand is the value that has been promoted. /// \return True if the promotion is profitable, false otherwise. -bool AddressingModeMatcher::IsPromotionProfitable( +bool AddressingModeMatcher::isPromotionProfitable( unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const { DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost << '\n'); // The cost of the new extensions is greater than the cost of the @@ -2656,9 +3072,9 @@ bool AddressingModeMatcher::IsPromotionProfitable( return isPromotedInstructionLegal(TLI, DL, PromotedOperand); } -/// MatchOperationAddr - Given an instruction or constant expr, see if we can -/// fold the operation into the addressing mode. If so, update the addressing -/// mode and return true, otherwise return false without modifying AddrMode. +/// Given an instruction or constant expr, see if we can fold the operation +/// into the addressing mode. If so, update the addressing mode and return +/// true, otherwise return false without modifying AddrMode. /// If \p MovedAway is not NULL, it contains the information of whether or /// not AddrInst has to be folded into the addressing mode on success. /// If \p MovedAway == true, \p AddrInst will not be part of the addressing @@ -2667,7 +3083,7 @@ bool AddressingModeMatcher::IsPromotionProfitable( /// This state can happen when AddrInst is a sext, since it may be moved away. /// Therefore, AddrInst may not be valid when MovedAway is true and it must /// not be referenced anymore. -bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, +bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, unsigned Depth, bool *MovedAway) { // Avoid exponential behavior on extremely deep expression trees. @@ -2680,13 +3096,13 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, switch (Opcode) { case Instruction::PtrToInt: // PtrToInt is always a noop, as we know that the int type is pointer sized. - return MatchAddr(AddrInst->getOperand(0), Depth); + return matchAddr(AddrInst->getOperand(0), Depth); case Instruction::IntToPtr: { auto AS = AddrInst->getType()->getPointerAddressSpace(); auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS)); // This inttoptr is a no-op if the integer type is pointer sized. if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy) - return MatchAddr(AddrInst->getOperand(0), Depth); + return matchAddr(AddrInst->getOperand(0), Depth); return false; } case Instruction::BitCast: @@ -2698,14 +3114,14 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, // and we don't want to mess around with them. Assume it knows what it // is doing. AddrInst->getOperand(0)->getType() != AddrInst->getType()) - return MatchAddr(AddrInst->getOperand(0), Depth); + return matchAddr(AddrInst->getOperand(0), Depth); return false; case Instruction::AddrSpaceCast: { unsigned SrcAS = AddrInst->getOperand(0)->getType()->getPointerAddressSpace(); unsigned DestAS = AddrInst->getType()->getPointerAddressSpace(); if (TLI.isNoopAddrSpaceCast(SrcAS, DestAS)) - return MatchAddr(AddrInst->getOperand(0), Depth); + return matchAddr(AddrInst->getOperand(0), Depth); return false; } case Instruction::Add: { @@ -2719,8 +3135,8 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); - if (MatchAddr(AddrInst->getOperand(1), Depth+1) && - MatchAddr(AddrInst->getOperand(0), Depth+1)) + if (matchAddr(AddrInst->getOperand(1), Depth+1) && + matchAddr(AddrInst->getOperand(0), Depth+1)) return true; // Restore the old addr mode info. @@ -2729,8 +3145,8 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, TPT.rollback(LastKnownGood); // Otherwise this was over-aggressive. Try merging in the LHS then the RHS. - if (MatchAddr(AddrInst->getOperand(0), Depth+1) && - MatchAddr(AddrInst->getOperand(1), Depth+1)) + if (matchAddr(AddrInst->getOperand(0), Depth+1) && + matchAddr(AddrInst->getOperand(1), Depth+1)) return true; // Otherwise we definitely can't merge the ADD in. @@ -2752,7 +3168,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, if (Opcode == Instruction::Shl) Scale = 1LL << Scale; - return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth); + return matchScaledValue(AddrInst->getOperand(0), Scale, Depth); } case Instruction::GetElementPtr: { // Scan the GEP. We check it if it contains constant offsets and at most @@ -2791,7 +3207,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, if (ConstantOffset == 0 || TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) { // Check to see if we can fold the base pointer in too. - if (MatchAddr(AddrInst->getOperand(0), Depth+1)) + if (matchAddr(AddrInst->getOperand(0), Depth+1)) return true; } AddrMode.BaseOffs -= ConstantOffset; @@ -2806,7 +3222,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, AddrMode.BaseOffs += ConstantOffset; // Match the base operand of the GEP. - if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) { + if (!matchAddr(AddrInst->getOperand(0), Depth+1)) { // If it couldn't be matched, just stuff the value in a register. if (AddrMode.HasBaseReg) { AddrMode = BackupAddrMode; @@ -2818,7 +3234,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, } // Match the remaining variable portion of the GEP. - if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale, + if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale, Depth)) { // If it couldn't be matched, try stuffing the base into a register // instead of matching it, and retrying the match of the scale. @@ -2829,7 +3245,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, AddrMode.HasBaseReg = true; AddrMode.BaseReg = AddrInst->getOperand(0); AddrMode.BaseOffs += ConstantOffset; - if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), + if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale, Depth)) { // If even that didn't work, bail. AddrMode = BackupAddrMode; @@ -2879,12 +3295,12 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, ExtAddrMode BackupAddrMode = AddrMode; unsigned OldSize = AddrModeInsts.size(); - if (!MatchAddr(PromotedOperand, Depth) || - // The total of the new cost is equals to the cost of the created + if (!matchAddr(PromotedOperand, Depth) || + // The total of the new cost is equal to the cost of the created // instructions. - // The total of the old cost is equals to the cost of the extension plus + // The total of the old cost is equal to the cost of the extension plus // what we have saved in the addressing mode. - !IsPromotionProfitable(CreatedInstsCost, + !isPromotionProfitable(CreatedInstsCost, ExtCost + (AddrModeInsts.size() - OldSize), PromotedOperand)) { AddrMode = BackupAddrMode; @@ -2899,12 +3315,12 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, return false; } -/// MatchAddr - If we can, try to add the value of 'Addr' into the current -/// addressing mode. If Addr can't be added to AddrMode this returns false and -/// leaves AddrMode unmodified. This assumes that Addr is either a pointer type -/// or intptr_t for the target. +/// If we can, try to add the value of 'Addr' into the current addressing mode. +/// If Addr can't be added to AddrMode this returns false and leaves AddrMode +/// unmodified. This assumes that Addr is either a pointer type or intptr_t +/// for the target. /// -bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { +bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) { // Start a transaction at this point that we will rollback if the matching // fails. TypePromotionTransaction::ConstRestorationPt LastKnownGood = @@ -2929,8 +3345,8 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { // Check to see if it is possible to fold this operation. bool MovedAway = false; - if (MatchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) { - // This instruction may have been move away. If so, there is nothing + if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) { + // This instruction may have been moved away. If so, there is nothing // to check here. if (MovedAway) return true; @@ -2938,7 +3354,7 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { // *profitable* to do so. We use a simple cost model to avoid increasing // register pressure too much. if (I->hasOneUse() || - IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) { + isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) { AddrModeInsts.push_back(I); return true; } @@ -2950,7 +3366,7 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { TPT.rollback(LastKnownGood); } } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) { - if (MatchOperationAddr(CE, CE->getOpcode(), Depth)) + if (matchOperationAddr(CE, CE->getOpcode(), Depth)) return true; TPT.rollback(LastKnownGood); } else if (isa<ConstantPointerNull>(Addr)) { @@ -2983,9 +3399,8 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { return false; } -/// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified -/// inline asm call are due to memory operands. If so, return true, otherwise -/// return false. +/// Check to see if all uses of OpVal by the specified inline asm call are due +/// to memory operands. If so, return true, otherwise return false. static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, const TargetMachine &TM) { const Function *F = CI->getParent()->getParent(); @@ -3011,8 +3426,8 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, return true; } -/// FindAllMemoryUses - Recursively walk all the uses of I until we find a -/// memory use. If we find an obviously non-foldable instruction, return true. +/// Recursively walk all the uses of I until we find a memory use. +/// If we find an obviously non-foldable instruction, return true. /// Add the ultimately found memory instructions to MemoryUses. static bool FindAllMemoryUses( Instruction *I, @@ -3059,11 +3474,11 @@ static bool FindAllMemoryUses( return false; } -/// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at -/// the use site that we're folding it into. If so, there is no cost to -/// include it in the addressing mode. KnownLive1 and KnownLive2 are two values -/// that we know are live at the instruction already. -bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1, +/// Return true if Val is already known to be live at the use site that we're +/// folding it into. If so, there is no cost to include it in the addressing +/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the +/// instruction already. +bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,Value *KnownLive1, Value *KnownLive2) { // If Val is either of the known-live values, we know it is live! if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2) @@ -3085,11 +3500,11 @@ bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1, return Val->isUsedInBasicBlock(MemoryInst->getParent()); } -/// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing -/// mode of the machine to fold the specified instruction into a load or store -/// that ultimately uses it. However, the specified instruction has multiple -/// uses. Given this, it may actually increase register pressure to fold it -/// into the load. For example, consider this code: +/// It is possible for the addressing mode of the machine to fold the specified +/// instruction into a load or store that ultimately uses it. +/// However, the specified instruction has multiple uses. +/// Given this, it may actually increase register pressure to fold it +/// into the load. For example, consider this code: /// /// X = ... /// Y = X+1 @@ -3107,7 +3522,7 @@ bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1, /// X was live across 'load Z' for other reasons, we actually *would* want to /// fold the addressing mode in the Z case. This would make Y die earlier. bool AddressingModeMatcher:: -IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, +isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, ExtAddrMode &AMAfter) { if (IgnoreProfitability) return true; @@ -3124,9 +3539,9 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, // If the BaseReg or ScaledReg was referenced by the previous addrmode, their // lifetime wasn't extended by adding this instruction. - if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg)) + if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg)) BaseReg = nullptr; - if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg)) + if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg)) ScaledReg = nullptr; // If folding this instruction (and it's subexprs) didn't extend any live @@ -3171,7 +3586,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, MemoryInst, Result, InsertedInsts, PromotedInsts, TPT); Matcher.IgnoreProfitability = true; - bool Success = Matcher.MatchAddr(Address, 0); + bool Success = Matcher.matchAddr(Address, 0); (void)Success; assert(Success && "Couldn't select *anything*?"); // The match was to check the profitability, the changes made are not @@ -3192,7 +3607,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, } // end anonymous namespace -/// IsNonLocalValue - Return true if the specified values are defined in a +/// Return true if the specified values are defined in a /// different basic block than BB. static bool IsNonLocalValue(Value *V, BasicBlock *BB) { if (Instruction *I = dyn_cast<Instruction>(V)) @@ -3200,16 +3615,15 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) { return false; } -/// OptimizeMemoryInst - Load and Store Instructions often have -/// addressing modes that can do significant amounts of computation. As such, -/// instruction selection will try to get the load or store to do as much -/// computation as possible for the program. The problem is that isel can only -/// see within a single block. As such, we sink as much legal addressing mode -/// stuff into the block as possible. +/// Load and Store Instructions often have addressing modes that can do +/// significant amounts of computation. As such, instruction selection will try +/// to get the load or store to do as much computation as possible for the +/// program. The problem is that isel can only see within a single block. As +/// such, we sink as much legal addressing mode work into the block as possible. /// /// This method is used to optimize both load/store and inline asms with memory /// operands. -bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, +bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Type *AccessTy, unsigned AddrSpace) { Value *Repl = Addr; @@ -3530,12 +3944,12 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, if (Repl->use_empty()) { // This can cause recursive deletion, which can invalidate our iterator. // Use a WeakVH to hold onto it in case this happens. - WeakVH IterHandle(CurInstIterator); + WeakVH IterHandle(&*CurInstIterator); BasicBlock *BB = CurInstIterator->getParent(); RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo); - if (IterHandle != CurInstIterator) { + if (IterHandle != CurInstIterator.getNodePtrUnchecked()) { // If the iterator instruction was recursively deleted, start over at the // start of the block. CurInstIterator = BB->begin(); @@ -3546,10 +3960,9 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, return true; } -/// OptimizeInlineAsmInst - If there are any memory operands, use -/// OptimizeMemoryInst to sink their address computing into the block when -/// possible / profitable. -bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) { +/// If there are any memory operands, use OptimizeMemoryInst to sink their +/// address computing into the block when possible / profitable. +bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) { bool MadeChange = false; const TargetRegisterInfo *TRI = @@ -3566,7 +3979,7 @@ bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) { if (OpInfo.ConstraintType == TargetLowering::C_Memory && OpInfo.isIndirect) { Value *OpVal = CS->getArgOperand(ArgNo++); - MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u); + MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u); } else if (OpInfo.Type == InlineAsm::isInput) ArgNo++; } @@ -3646,7 +4059,7 @@ static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) { /// %add = add nuw i64 %zext, 4 /// \encode /// Thanks to the promotion, we can match zext(load i32*) to i64. -bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT, +bool CodeGenPrepare::extLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI, Instruction *&Inst, const SmallVectorImpl<Instruction *> &Exts, unsigned CreatedInstsCost = 0) { @@ -3696,7 +4109,7 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT, } // The promotion is profitable. // Check if it exposes an ext(load). - (void)ExtLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInstsCost); + (void)extLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInstsCost); if (LI && (StressExtLdPromotion || NewCreatedInstsCost <= ExtCost || // If we have created a new extension, i.e., now we have two // extensions. We must make sure one of them is merged with @@ -3713,13 +4126,13 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT, return false; } -/// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same -/// basic block as the load, unless conditions are unfavorable. This allows -/// SelectionDAG to fold the extend into the load. +/// Move a zext or sext fed by a load into the same basic block as the load, +/// unless conditions are unfavorable. This allows SelectionDAG to fold the +/// extend into the load. /// \p I[in/out] the extension may be modified during the process if some /// promotions apply. /// -bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *&I) { +bool CodeGenPrepare::moveExtToFormExtLoad(Instruction *&I) { // Try to promote a chain of computation if it allows to form // an extended load. TypePromotionTransaction TPT; @@ -3730,7 +4143,7 @@ bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *&I) { // Look for a load being extended. LoadInst *LI = nullptr; Instruction *OldExt = I; - bool HasPromoted = ExtLdPromotion(TPT, LI, I, Exts); + bool HasPromoted = extLdPromotion(TPT, LI, I, Exts); if (!LI || !I) { assert(!HasPromoted && !LI && "If we did not match any load instruction " "the code must remain the same"); @@ -3780,7 +4193,7 @@ bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *&I) { return true; } -bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { +bool CodeGenPrepare::optimizeExtUses(Instruction *I) { BasicBlock *DefBB = I->getParent(); // If the result of a {s|z}ext and its source are both live out, rewrite all @@ -3838,7 +4251,8 @@ bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { if (!InsertedTrunc) { BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); - InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt); + assert(InsertPt != UserBB->end()); + InsertedTrunc = new TruncInst(I, Src->getType(), "", &*InsertPt); InsertedInsts.insert(InsertedTrunc); } @@ -3851,9 +4265,202 @@ bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { return MadeChange; } -/// isFormingBranchFromSelectProfitable - Returns true if a SelectInst should be -/// turned into an explicit branch. -static bool isFormingBranchFromSelectProfitable(SelectInst *SI) { +// Find loads whose uses only use some of the loaded value's bits. Add an "and" +// just after the load if the target can fold this into one extload instruction, +// with the hope of eliminating some of the other later "and" instructions using +// the loaded value. "and"s that are made trivially redundant by the insertion +// of the new "and" are removed by this function, while others (e.g. those whose +// path from the load goes through a phi) are left for isel to potentially +// remove. +// +// For example: +// +// b0: +// x = load i32 +// ... +// b1: +// y = and x, 0xff +// z = use y +// +// becomes: +// +// b0: +// x = load i32 +// x' = and x, 0xff +// ... +// b1: +// z = use x' +// +// whereas: +// +// b0: +// x1 = load i32 +// ... +// b1: +// x2 = load i32 +// ... +// b2: +// x = phi x1, x2 +// y = and x, 0xff +// +// becomes (after a call to optimizeLoadExt for each load): +// +// b0: +// x1 = load i32 +// x1' = and x1, 0xff +// ... +// b1: +// x2 = load i32 +// x2' = and x2, 0xff +// ... +// b2: +// x = phi x1', x2' +// y = and x, 0xff +// + +bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) { + + if (!Load->isSimple() || + !(Load->getType()->isIntegerTy() || Load->getType()->isPointerTy())) + return false; + + // Skip loads we've already transformed or have no reason to transform. + if (Load->hasOneUse()) { + User *LoadUser = *Load->user_begin(); + if (cast<Instruction>(LoadUser)->getParent() == Load->getParent() && + !dyn_cast<PHINode>(LoadUser)) + return false; + } + + // Look at all uses of Load, looking through phis, to determine how many bits + // of the loaded value are needed. + SmallVector<Instruction *, 8> WorkList; + SmallPtrSet<Instruction *, 16> Visited; + SmallVector<Instruction *, 8> AndsToMaybeRemove; + for (auto *U : Load->users()) + WorkList.push_back(cast<Instruction>(U)); + + EVT LoadResultVT = TLI->getValueType(*DL, Load->getType()); + unsigned BitWidth = LoadResultVT.getSizeInBits(); + APInt DemandBits(BitWidth, 0); + APInt WidestAndBits(BitWidth, 0); + + while (!WorkList.empty()) { + Instruction *I = WorkList.back(); + WorkList.pop_back(); + + // Break use-def graph loops. + if (!Visited.insert(I).second) + continue; + + // For a PHI node, push all of its users. + if (auto *Phi = dyn_cast<PHINode>(I)) { + for (auto *U : Phi->users()) + WorkList.push_back(cast<Instruction>(U)); + continue; + } + + switch (I->getOpcode()) { + case llvm::Instruction::And: { + auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1)); + if (!AndC) + return false; + APInt AndBits = AndC->getValue(); + DemandBits |= AndBits; + // Keep track of the widest and mask we see. + if (AndBits.ugt(WidestAndBits)) + WidestAndBits = AndBits; + if (AndBits == WidestAndBits && I->getOperand(0) == Load) + AndsToMaybeRemove.push_back(I); + break; + } + + case llvm::Instruction::Shl: { + auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1)); + if (!ShlC) + return false; + uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1); + auto ShlDemandBits = APInt::getAllOnesValue(BitWidth).lshr(ShiftAmt); + DemandBits |= ShlDemandBits; + break; + } + + case llvm::Instruction::Trunc: { + EVT TruncVT = TLI->getValueType(*DL, I->getType()); + unsigned TruncBitWidth = TruncVT.getSizeInBits(); + auto TruncBits = APInt::getAllOnesValue(TruncBitWidth).zext(BitWidth); + DemandBits |= TruncBits; + break; + } + + default: + return false; + } + } + + uint32_t ActiveBits = DemandBits.getActiveBits(); + // Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the + // target even if isLoadExtLegal says an i1 EXTLOAD is valid. For example, + // for the AArch64 target isLoadExtLegal(ZEXTLOAD, i32, i1) returns true, but + // (and (load x) 1) is not matched as a single instruction, rather as a LDR + // followed by an AND. + // TODO: Look into removing this restriction by fixing backends to either + // return false for isLoadExtLegal for i1 or have them select this pattern to + // a single instruction. + // + // Also avoid hoisting if we didn't see any ands with the exact DemandBits + // mask, since these are the only ands that will be removed by isel. + if (ActiveBits <= 1 || !APIntOps::isMask(ActiveBits, DemandBits) || + WidestAndBits != DemandBits) + return false; + + LLVMContext &Ctx = Load->getType()->getContext(); + Type *TruncTy = Type::getIntNTy(Ctx, ActiveBits); + EVT TruncVT = TLI->getValueType(*DL, TruncTy); + + // Reject cases that won't be matched as extloads. + if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() || + !TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT)) + return false; + + IRBuilder<> Builder(Load->getNextNode()); + auto *NewAnd = dyn_cast<Instruction>( + Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits))); + + // Replace all uses of load with new and (except for the use of load in the + // new and itself). + Load->replaceAllUsesWith(NewAnd); + NewAnd->setOperand(0, Load); + + // Remove any and instructions that are now redundant. + for (auto *And : AndsToMaybeRemove) + // Check that the and mask is the same as the one we decided to put on the + // new and. + if (cast<ConstantInt>(And->getOperand(1))->getValue() == DemandBits) { + And->replaceAllUsesWith(NewAnd); + if (&*CurInstIterator == And) + CurInstIterator = std::next(And->getIterator()); + And->eraseFromParent(); + ++NumAndUses; + } + + ++NumAndsAdded; + return true; +} + +/// Check if V (an operand of a select instruction) is an expensive instruction +/// that is only used once. +static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V) { + auto *I = dyn_cast<Instruction>(V); + // If it's safe to speculatively execute, then it should not have side + // effects; therefore, it's safe to sink and possibly *not* execute. + return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) && + TTI->getUserCost(I) >= TargetTransformInfo::TCC_Expensive; +} + +/// Returns true if a SelectInst should be turned into an explicit branch. +static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, + SelectInst *SI) { // FIXME: This should use the same heuristics as IfConversion to determine // whether a select is better represented as a branch. This requires that // branch probability metadata is preserved for the select, which is not the @@ -3861,28 +4468,36 @@ static bool isFormingBranchFromSelectProfitable(SelectInst *SI) { CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition()); - // If the branch is predicted right, an out of order CPU can avoid blocking on - // the compare. Emit cmovs on compares with a memory operand as branches to - // avoid stalls on the load from memory. If the compare has more than one use - // there's probably another cmov or setcc around so it's not worth emitting a - // branch. - if (!Cmp) + // If a branch is predictable, an out-of-order CPU can avoid blocking on its + // comparison condition. If the compare has more than one use, there's + // probably another cmov or setcc around, so it's not worth emitting a branch. + if (!Cmp || !Cmp->hasOneUse()) return false; Value *CmpOp0 = Cmp->getOperand(0); Value *CmpOp1 = Cmp->getOperand(1); - // We check that the memory operand has one use to avoid uses of the loaded - // value directly after the compare, making branches unprofitable. - return Cmp->hasOneUse() && - ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) || - (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse())); + // Emit "cmov on compare with a memory operand" as a branch to avoid stalls + // on a load from memory. But if the load is used more than once, do not + // change the select to a branch because the load is probably needed + // regardless of whether the branch is taken or not. + if ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) || + (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse())) + return true; + + // If either operand of the select is expensive and only needed on one side + // of the select, we should form a branch. + if (sinkSelectOperand(TTI, SI->getTrueValue()) || + sinkSelectOperand(TTI, SI->getFalseValue())) + return true; + + return false; } /// If we have a SelectInst that will likely profit from branch prediction, /// turn it into a branch. -bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) { +bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); // Can we convert the 'select' to CF ? @@ -3902,34 +4517,97 @@ bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) { // We have efficient codegen support for the select instruction. // Check if it is profitable to keep this 'select'. if (!TLI->isPredictableSelectExpensive() || - !isFormingBranchFromSelectProfitable(SI)) + !isFormingBranchFromSelectProfitable(TTI, SI)) return false; } ModifiedDT = true; + // Transform a sequence like this: + // start: + // %cmp = cmp uge i32 %a, %b + // %sel = select i1 %cmp, i32 %c, i32 %d + // + // Into: + // start: + // %cmp = cmp uge i32 %a, %b + // br i1 %cmp, label %select.true, label %select.false + // select.true: + // br label %select.end + // select.false: + // br label %select.end + // select.end: + // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ] + // + // In addition, we may sink instructions that produce %c or %d from + // the entry block into the destination(s) of the new branch. + // If the true or false blocks do not contain a sunken instruction, that + // block and its branch may be optimized away. In that case, one side of the + // first branch will point directly to select.end, and the corresponding PHI + // predecessor block will be the start block. + // First, we split the block containing the select into 2 blocks. BasicBlock *StartBlock = SI->getParent(); BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI)); - BasicBlock *NextBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); + BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); - // Create a new block serving as the landing pad for the branch. - BasicBlock *SmallBlock = BasicBlock::Create(SI->getContext(), "select.mid", - NextBlock->getParent(), NextBlock); - - // Move the unconditional branch from the block with the select in it into our - // landing pad block. + // Delete the unconditional branch that was just created by the split. StartBlock->getTerminator()->eraseFromParent(); - BranchInst::Create(NextBlock, SmallBlock); + + // These are the new basic blocks for the conditional branch. + // At least one will become an actual new basic block. + BasicBlock *TrueBlock = nullptr; + BasicBlock *FalseBlock = nullptr; + + // Sink expensive instructions into the conditional blocks to avoid executing + // them speculatively. + if (sinkSelectOperand(TTI, SI->getTrueValue())) { + TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink", + EndBlock->getParent(), EndBlock); + auto *TrueBranch = BranchInst::Create(EndBlock, TrueBlock); + auto *TrueInst = cast<Instruction>(SI->getTrueValue()); + TrueInst->moveBefore(TrueBranch); + } + if (sinkSelectOperand(TTI, SI->getFalseValue())) { + FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink", + EndBlock->getParent(), EndBlock); + auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock); + auto *FalseInst = cast<Instruction>(SI->getFalseValue()); + FalseInst->moveBefore(FalseBranch); + } + + // If there was nothing to sink, then arbitrarily choose the 'false' side + // for a new input value to the PHI. + if (TrueBlock == FalseBlock) { + assert(TrueBlock == nullptr && + "Unexpected basic block transform while optimizing select"); + + FalseBlock = BasicBlock::Create(SI->getContext(), "select.false", + EndBlock->getParent(), EndBlock); + BranchInst::Create(EndBlock, FalseBlock); + } // Insert the real conditional branch based on the original condition. - BranchInst::Create(NextBlock, SmallBlock, SI->getCondition(), SI); + // If we did not create a new block for one of the 'true' or 'false' paths + // of the condition, it means that side of the branch goes to the end block + // directly and the path originates from the start block from the point of + // view of the new PHI. + if (TrueBlock == nullptr) { + BranchInst::Create(EndBlock, FalseBlock, SI->getCondition(), SI); + TrueBlock = StartBlock; + } else if (FalseBlock == nullptr) { + BranchInst::Create(TrueBlock, EndBlock, SI->getCondition(), SI); + FalseBlock = StartBlock; + } else { + BranchInst::Create(TrueBlock, FalseBlock, SI->getCondition(), SI); + } // The select itself is replaced with a PHI Node. - PHINode *PN = PHINode::Create(SI->getType(), 2, "", NextBlock->begin()); + PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front()); PN->takeName(SI); - PN->addIncoming(SI->getTrueValue(), StartBlock); - PN->addIncoming(SI->getFalseValue(), SmallBlock); + PN->addIncoming(SI->getTrueValue(), TrueBlock); + PN->addIncoming(SI->getFalseValue(), FalseBlock); + SI->replaceAllUsesWith(PN); SI->eraseFromParent(); @@ -3955,7 +4633,7 @@ static bool isBroadcastShuffle(ShuffleVectorInst *SVI) { /// (e.g. x86 only introduced "vpsllvd" and friends with AVX2). In these cases /// it's often worth sinking a shufflevector splat down to its use so that /// codegen can spot all lanes are identical. -bool CodeGenPrepare::OptimizeShuffleVectorInst(ShuffleVectorInst *SVI) { +bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) { BasicBlock *DefBB = SVI->getParent(); // Only do this xform if variable vector shifts are particularly expensive. @@ -3987,9 +4665,10 @@ bool CodeGenPrepare::OptimizeShuffleVectorInst(ShuffleVectorInst *SVI) { if (!InsertedShuffle) { BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); - InsertedShuffle = new ShuffleVectorInst(SVI->getOperand(0), - SVI->getOperand(1), - SVI->getOperand(2), "", InsertPt); + assert(InsertPt != UserBB->end()); + InsertedShuffle = + new ShuffleVectorInst(SVI->getOperand(0), SVI->getOperand(1), + SVI->getOperand(2), "", &*InsertPt); } UI->replaceUsesOfWith(SVI, InsertedShuffle); @@ -4005,6 +4684,49 @@ bool CodeGenPrepare::OptimizeShuffleVectorInst(ShuffleVectorInst *SVI) { return MadeChange; } +bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) { + if (!TLI || !DL) + return false; + + Value *Cond = SI->getCondition(); + Type *OldType = Cond->getType(); + LLVMContext &Context = Cond->getContext(); + MVT RegType = TLI->getRegisterType(Context, TLI->getValueType(*DL, OldType)); + unsigned RegWidth = RegType.getSizeInBits(); + + if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth()) + return false; + + // If the register width is greater than the type width, expand the condition + // of the switch instruction and each case constant to the width of the + // register. By widening the type of the switch condition, subsequent + // comparisons (for case comparisons) will not need to be extended to the + // preferred register width, so we will potentially eliminate N-1 extends, + // where N is the number of cases in the switch. + auto *NewType = Type::getIntNTy(Context, RegWidth); + + // Zero-extend the switch condition and case constants unless the switch + // condition is a function argument that is already being sign-extended. + // In that case, we can avoid an unnecessary mask/extension by sign-extending + // everything instead. + Instruction::CastOps ExtType = Instruction::ZExt; + if (auto *Arg = dyn_cast<Argument>(Cond)) + if (Arg->hasSExtAttr()) + ExtType = Instruction::SExt; + + auto *ExtInst = CastInst::Create(ExtType, Cond, NewType); + ExtInst->insertBefore(SI); + SI->setCondition(ExtInst); + for (SwitchInst::CaseIt Case : SI->cases()) { + APInt NarrowConst = Case.getCaseValue()->getValue(); + APInt WideConst = (ExtType == Instruction::ZExt) ? + NarrowConst.zext(RegWidth) : NarrowConst.sext(RegWidth); + Case.setValue(ConstantInt::get(Context, WideConst)); + } + + return true; +} + namespace { /// \brief Helper class to promote a scalar operation to a vector one. /// This class is used to move downward extractelement transition. @@ -4138,7 +4860,7 @@ class VectorPromoteHelper { /// \brief Generate a constant vector with \p Val with the same /// number of elements as the transition. /// \p UseSplat defines whether or not \p Val should be replicated - /// accross the whole vector. + /// across the whole vector. /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>, /// otherwise we generate a vector with as many undef as possible: /// <undef, ..., undef, Val, undef, ..., undef> where \p Val is only @@ -4320,7 +5042,7 @@ void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) { /// Some targets can do store(extractelement) with one instruction. /// Try to push the extractelement towards the stores when the target /// has this feature and this is profitable. -bool CodeGenPrepare::OptimizeExtractElementInst(Instruction *Inst) { +bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) { unsigned CombineCost = UINT_MAX; if (DisableStoreExtract || !TLI || (!StressStoreExtract && @@ -4372,7 +5094,7 @@ bool CodeGenPrepare::OptimizeExtractElementInst(Instruction *Inst) { return false; } -bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) { +bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) { // Bail out if we inserted the instruction to prevent optimizations from // stepping on each other's toes. if (InsertedInsts.count(I)) @@ -4413,8 +5135,8 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) { TargetLowering::TypeExpandInteger) { return SinkCast(CI); } else { - bool MadeChange = MoveExtToFormExtLoad(I); - return MadeChange | OptimizeExtUses(I); + bool MadeChange = moveExtToFormExtLoad(I); + return MadeChange | optimizeExtUses(I); } } return false; @@ -4425,17 +5147,21 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) { return OptimizeCmpExpression(CI); if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + stripInvariantGroupMetadata(*LI); if (TLI) { + bool Modified = optimizeLoadExt(LI); unsigned AS = LI->getPointerAddressSpace(); - return OptimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS); + Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS); + return Modified; } return false; } if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + stripInvariantGroupMetadata(*SI); if (TLI) { unsigned AS = SI->getPointerAddressSpace(); - return OptimizeMemoryInst(I, SI->getOperand(1), + return optimizeMemoryInst(I, SI->getOperand(1), SI->getOperand(0)->getType(), AS); } return false; @@ -4460,23 +5186,26 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) { GEPI->replaceAllUsesWith(NC); GEPI->eraseFromParent(); ++NumGEPsElim; - OptimizeInst(NC, ModifiedDT); + optimizeInst(NC, ModifiedDT); return true; } return false; } if (CallInst *CI = dyn_cast<CallInst>(I)) - return OptimizeCallInst(CI, ModifiedDT); + return optimizeCallInst(CI, ModifiedDT); if (SelectInst *SI = dyn_cast<SelectInst>(I)) - return OptimizeSelectInst(SI); + return optimizeSelectInst(SI); if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) - return OptimizeShuffleVectorInst(SVI); + return optimizeShuffleVectorInst(SVI); + + if (auto *Switch = dyn_cast<SwitchInst>(I)) + return optimizeSwitchInst(Switch); if (isa<ExtractElementInst>(I)) - return OptimizeExtractElementInst(I); + return optimizeExtractElementInst(I); return false; } @@ -4484,17 +5213,17 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) { // In this pass we look for GEP and cast instructions that are used // across basic blocks and rewrite them to improve basic-block-at-a-time // selection. -bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB, bool& ModifiedDT) { +bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool& ModifiedDT) { SunkAddrs.clear(); bool MadeChange = false; CurInstIterator = BB.begin(); while (CurInstIterator != BB.end()) { - MadeChange |= OptimizeInst(CurInstIterator++, ModifiedDT); + MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT); if (ModifiedDT) return true; } - MadeChange |= DupRetToEnableTailCallOpts(&BB); + MadeChange |= dupRetToEnableTailCallOpts(&BB); return MadeChange; } @@ -4502,12 +5231,12 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB, bool& ModifiedDT) { // llvm.dbg.value is far away from the value then iSel may not be able // handle it properly. iSel will drop llvm.dbg.value if it can not // find a node corresponding to the value. -bool CodeGenPrepare::PlaceDbgValues(Function &F) { +bool CodeGenPrepare::placeDbgValues(Function &F) { bool MadeChange = false; for (BasicBlock &BB : F) { Instruction *PrevNonDbgInst = nullptr; for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) { - Instruction *Insn = BI++; + Instruction *Insn = &*BI++; DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn); // Leave dbg.values that refer to an alloca alone. These // instrinsics describe the address of a variable (= the alloca) @@ -4521,10 +5250,14 @@ bool CodeGenPrepare::PlaceDbgValues(Function &F) { Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue()); if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) { + // If VI is a phi in a block with an EHPad terminator, we can't insert + // after it. + if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad()) + continue; DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI); DVI->removeFromParent(); if (isa<PHINode>(VI)) - DVI->insertBefore(VI->getParent()->getFirstInsertionPt()); + DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt()); else DVI->insertAfter(VI); MadeChange = true; @@ -4548,7 +5281,7 @@ bool CodeGenPrepare::sinkAndCmp(Function &F) { return false; bool MadeChange = false; for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { - BasicBlock *BB = I++; + BasicBlock *BB = &*I++; // Does this BB end with the following? // %andVal = and %val, #single-bit-set @@ -4671,6 +5404,10 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) { if (!match(BB.getTerminator(), m_Br(m_OneUse(m_BinOp(LogicOp)), TBB, FBB))) continue; + auto *Br1 = cast<BranchInst>(BB.getTerminator()); + if (Br1->getMetadata(LLVMContext::MD_unpredictable)) + continue; + unsigned Opc; Value *Cond1, *Cond2; if (match(LogicOp, m_And(m_OneUse(m_Value(Cond1)), @@ -4697,7 +5434,6 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) { // Update original basic block by using the first condition directly by the // branch instruction and removing the no longer needed and/or instruction. - auto *Br1 = cast<BranchInst>(BB.getTerminator()); Br1->setCondition(Cond1); LogicOp->eraseFromParent(); @@ -4828,3 +5564,8 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) { } return MadeChange; } + +void CodeGenPrepare::stripInvariantGroupMetadata(Instruction &I) { + if (auto *InvariantMD = I.getMetadata(LLVMContext::MD_invariant_group)) + I.dropUnknownNonDebugMetadata(InvariantMD->getMetadataID()); +} diff --git a/contrib/llvm/lib/CodeGen/CoreCLRGC.cpp b/contrib/llvm/lib/CodeGen/CoreCLRGC.cpp index 28c97ba71bd9..ff7c0d5dc0ac 100644 --- a/contrib/llvm/lib/CodeGen/CoreCLRGC.cpp +++ b/contrib/llvm/lib/CodeGen/CoreCLRGC.cpp @@ -38,9 +38,9 @@ public: UsesMetadata = false; CustomRoots = false; } - Optional<bool> isGCManagedPointer(const Value *V) const override { + Optional<bool> isGCManagedPointer(const Type *Ty) const override { // Method is only valid on pointer typed values. - PointerType *PT = cast<PointerType>(V->getType()); + const PointerType *PT = cast<PointerType>(Ty); // We pick addrspace(1) as our GC managed heap. return (1 == PT->getAddressSpace()); } diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp index dba280fd5aa2..c924ba30c8a1 100644 --- a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -52,14 +52,13 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { // Clear "do not change" set. KeepRegs.reset(); - bool IsReturnBlock = (BBSize != 0 && BB->back().isReturn()); + bool IsReturnBlock = BB->isReturnBlock(); // Examine the live-in regs of all successors. for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) - for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), - E = (*SI)->livein_end(); I != E; ++I) { - for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) { + for (const auto &LI : (*SI)->liveins()) { + for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI) { unsigned Reg = *AI; Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); KillIndices[Reg] = BBSize; diff --git a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp index 0a188c0935ad..af6b6a392b75 100644 --- a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp +++ b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp @@ -31,10 +31,39 @@ #include "llvm/Target/TargetInstrInfo.h" using namespace llvm; -DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, const int (*SIT)[2], +// -------------------------------------------------------------------- +// Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp + +namespace { + DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) { + return (Inp << DFA_MAX_RESOURCES) | FuncUnits; + } + + /// Return the DFAInput for an instruction class input vector. + /// This function is used in both DFAPacketizer.cpp and in + /// DFAPacketizerEmitter.cpp. + DFAInput getDFAInsnInput(const std::vector<unsigned> &InsnClass) { + DFAInput InsnInput = 0; + assert ((InsnClass.size() <= DFA_MAX_RESTERMS) && + "Exceeded maximum number of DFA terms"); + for (auto U : InsnClass) + InsnInput = addDFAFuncUnits(InsnInput, U); + return InsnInput; + } +} +// -------------------------------------------------------------------- + +DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, + const DFAStateInput (*SIT)[2], const unsigned *SET): InstrItins(I), CurrentState(0), DFAStateInputTable(SIT), - DFAStateEntryTable(SET) {} + DFAStateEntryTable(SET) { + // Make sure DFA types are large enough for the number of terms & resources. + assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAInput)) + && "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAInput"); + assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAStateInput)) + && "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput"); +} // @@ -60,26 +89,42 @@ void DFAPacketizer::ReadTable(unsigned int state) { DFAStateInputTable[i][1]; } +// +// getInsnInput - Return the DFAInput for an instruction class. +// +DFAInput DFAPacketizer::getInsnInput(unsigned InsnClass) { + // Note: this logic must match that in DFAPacketizerDefs.h for input vectors. + DFAInput InsnInput = 0; + unsigned i = 0; + for (const InstrStage *IS = InstrItins->beginStage(InsnClass), + *IE = InstrItins->endStage(InsnClass); IS != IE; ++IS, ++i) { + InsnInput = addDFAFuncUnits(InsnInput, IS->getUnits()); + assert ((i < DFA_MAX_RESTERMS) && "Exceeded maximum number of DFA inputs"); + } + return InsnInput; +} + +// getInsnInput - Return the DFAInput for an instruction class input vector. +DFAInput DFAPacketizer::getInsnInput(const std::vector<unsigned> &InsnClass) { + return getDFAInsnInput(InsnClass); +} // canReserveResources - Check if the resources occupied by a MCInstrDesc // are available in the current state. bool DFAPacketizer::canReserveResources(const llvm::MCInstrDesc *MID) { unsigned InsnClass = MID->getSchedClass(); - const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass); - unsigned FuncUnits = IS->getUnits(); - UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits); + DFAInput InsnInput = getInsnInput(InsnClass); + UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput); ReadTable(CurrentState); return (CachedTable.count(StateTrans) != 0); } - // reserveResources - Reserve the resources occupied by a MCInstrDesc and // change the current state to reflect that change. void DFAPacketizer::reserveResources(const llvm::MCInstrDesc *MID) { unsigned InsnClass = MID->getSchedClass(); - const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass); - unsigned FuncUnits = IS->getUnits(); - UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits); + DFAInput InsnInput = getInsnInput(InsnClass); + UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput); ReadTable(CurrentState); assert(CachedTable.count(StateTrans) != 0); CurrentState = CachedTable[StateTrans]; @@ -104,32 +149,35 @@ namespace llvm { // DefaultVLIWScheduler - This class extends ScheduleDAGInstrs and overrides // Schedule method to build the dependence graph. class DefaultVLIWScheduler : public ScheduleDAGInstrs { +private: + AliasAnalysis *AA; public: DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI, - bool IsPostRA); + AliasAnalysis *AA); // Schedule - Actual scheduling work. void schedule() override; }; } DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF, - MachineLoopInfo &MLI, bool IsPostRA) - : ScheduleDAGInstrs(MF, &MLI, IsPostRA) { + MachineLoopInfo &MLI, + AliasAnalysis *AA) + : ScheduleDAGInstrs(MF, &MLI), AA(AA) { CanHandleTerminators = true; } void DefaultVLIWScheduler::schedule() { // Build the scheduling graph. - buildSchedGraph(nullptr); + buildSchedGraph(AA); } // VLIWPacketizerList Ctor VLIWPacketizerList::VLIWPacketizerList(MachineFunction &MF, - MachineLoopInfo &MLI, bool IsPostRA) - : MF(MF) { + MachineLoopInfo &MLI, AliasAnalysis *AA) + : MF(MF), AA(AA) { TII = MF.getSubtarget().getInstrInfo(); ResourceTracker = TII->CreateTargetScheduleState(MF.getSubtarget()); - VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, IsPostRA); + VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, AA); } // VLIWPacketizerList Dtor @@ -147,7 +195,7 @@ void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB, MachineInstr *MI) { if (CurrentPacketMIs.size() > 1) { MachineInstr *MIFirst = CurrentPacketMIs.front(); - finalizeBundle(*MBB, MIFirst, MI); + finalizeBundle(*MBB, MIFirst->getIterator(), MI->getIterator()); } CurrentPacketMIs.clear(); ResourceTracker->clearResources(); @@ -191,7 +239,7 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, // Ask DFA if machine resource is available for MI. bool ResourceAvail = ResourceTracker->canReserveResources(MI); - if (ResourceAvail) { + if (ResourceAvail && shouldAddToPacket(MI)) { // Dependency check for MI with instructions in CurrentPacketMIs. for (std::vector<MachineInstr*>::iterator VI = CurrentPacketMIs.begin(), VE = CurrentPacketMIs.end(); VI != VE; ++VI) { @@ -210,7 +258,8 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, } // !isLegalToPacketizeTogether. } // For all instructions in CurrentPacketMIs. } else { - // End the packet if resource is not available. + // End the packet if resource is not available, or if the instruction + // shoud not be added to the current packet. endPacket(MBB, MI); } diff --git a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp index 941129b5cc95..b11b49717c45 100644 --- a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -101,26 +101,22 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { // Loop over all instructions in all blocks, from bottom to top, so that it's // more likely that chains of dependent but ultimately dead instructions will // be cleaned up. - for (MachineFunction::reverse_iterator I = MF.rbegin(), E = MF.rend(); - I != E; ++I) { - MachineBasicBlock *MBB = &*I; - + for (MachineBasicBlock &MBB : make_range(MF.rbegin(), MF.rend())) { // Start out assuming that reserved registers are live out of this block. LivePhysRegs = MRI->getReservedRegs(); // Add live-ins from sucessors to LivePhysRegs. Normally, physregs are not // live across blocks, but some targets (x86) can have flags live out of a // block. - for (MachineBasicBlock::succ_iterator S = MBB->succ_begin(), - E = MBB->succ_end(); S != E; S++) - for (MachineBasicBlock::livein_iterator LI = (*S)->livein_begin(); - LI != (*S)->livein_end(); LI++) - LivePhysRegs.set(*LI); + for (MachineBasicBlock::succ_iterator S = MBB.succ_begin(), + E = MBB.succ_end(); S != E; S++) + for (const auto &LI : (*S)->liveins()) + LivePhysRegs.set(LI.PhysReg); // Now scan the instructions and delete dead ones, tracking physreg // liveness as we go. - for (MachineBasicBlock::reverse_iterator MII = MBB->rbegin(), - MIE = MBB->rend(); MII != MIE; ) { + for (MachineBasicBlock::reverse_iterator MII = MBB.rbegin(), + MIE = MBB.rend(); MII != MIE; ) { MachineInstr *MI = &*MII; // If the instruction is dead, delete it! @@ -132,7 +128,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { MI->eraseFromParentAndMarkDBGValuesForRemoval(); AnyChanges = true; ++NumDeletes; - MIE = MBB->rend(); + MIE = MBB.rend(); // MII is now pointing to the next instruction to process, // so don't increment it. continue; diff --git a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp index e019dfbc8f7d..eae78a950d9a 100644 --- a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp @@ -16,7 +16,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CFG.h" -#include "llvm/Analysis/LibCallSemantics.h" +#include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" @@ -192,9 +192,9 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { if (Resumes.empty()) return false; - // Check the personality, don't do anything if it's for MSVC. + // Check the personality, don't do anything if it's funclet-based. EHPersonality Pers = classifyEHPersonality(Fn.getPersonalityFn()); - if (isMSVCEHPersonality(Pers)) + if (isFuncletEHPersonality(Pers)) return false; LLVMContext &Ctx = Fn.getContext(); diff --git a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp index fbc4d97c4987..f3536d74111e 100644 --- a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp +++ b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp @@ -538,11 +538,11 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) { // Fix up the CFG, temporarily leave Head without any successors. Head->removeSuccessor(TBB); - Head->removeSuccessor(FBB); + Head->removeSuccessor(FBB, true); if (TBB != Tail) - TBB->removeSuccessor(Tail); + TBB->removeSuccessor(Tail, true); if (FBB != Tail) - FBB->removeSuccessor(Tail); + FBB->removeSuccessor(Tail, true); // Fix up Head's terminators. // It should become a single branch or a fallthrough. diff --git a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp index 5b09cf1a0fd7..c550008da025 100644 --- a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp +++ b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp @@ -375,9 +375,8 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { // This is the entry block. if (MBB->pred_empty()) { - for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(), - e = MBB->livein_end(); i != e; ++i) { - for (int rx : regIndices(*i)) { + for (const auto &LI : MBB->liveins()) { + for (int rx : regIndices(LI.PhysReg)) { // Treat function live-ins as if they were defined just before the first // instruction. Usually, function arguments are set up immediately // before the call. @@ -559,12 +558,11 @@ void ExeDepsFix::processUndefReads(MachineBasicBlock *MBB) { MachineInstr *UndefMI = UndefReads.back().first; unsigned OpIdx = UndefReads.back().second; - for (MachineBasicBlock::reverse_iterator I = MBB->rbegin(), E = MBB->rend(); - I != E; ++I) { + for (MachineInstr &I : make_range(MBB->rbegin(), MBB->rend())) { // Update liveness, including the current instruction's defs. - LiveRegSet.stepBackward(*I); + LiveRegSet.stepBackward(I); - if (UndefMI == &*I) { + if (UndefMI == &I) { if (!LiveRegSet.contains(UndefMI->getOperand(OpIdx).getReg())) TII->breakPartialRegDependency(UndefMI, OpIdx, TRI); @@ -733,12 +731,13 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { // If no relevant registers are used in the function, we can skip it // completely. bool anyregs = false; - for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end(); - I != E; ++I) - if (MF->getRegInfo().isPhysRegUsed(*I)) { + const MachineRegisterInfo &MRI = mf.getRegInfo(); + for (unsigned Reg : *RC) { + if (MRI.isPhysRegUsed(Reg)) { anyregs = true; break; } + } if (!anyregs) return false; // Initialize the AliasMap on the first use. @@ -752,7 +751,7 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { AliasMap[*AI].push_back(i); } - MachineBasicBlock *Entry = MF->begin(); + MachineBasicBlock *Entry = &*MF->begin(); ReversePostOrderTraversal<MachineBasicBlock*> RPOT(Entry); SmallVector<MachineBasicBlock*, 16> Loops; for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator @@ -761,22 +760,19 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { enterBasicBlock(MBB); if (SeenUnknownBackEdge) Loops.push_back(MBB); - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; - ++I) - visitInstr(I); + for (MachineInstr &MI : *MBB) + visitInstr(&MI); processUndefReads(MBB); leaveBasicBlock(MBB); } // Visit all the loop blocks again in order to merge DomainValues from // back-edges. - for (unsigned i = 0, e = Loops.size(); i != e; ++i) { - MachineBasicBlock *MBB = Loops[i]; + for (MachineBasicBlock *MBB : Loops) { enterBasicBlock(MBB); - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; - ++I) - if (!I->isDebugValue()) - processDefs(I, false); + for (MachineInstr &MI : *MBB) + if (!MI.isDebugValue()) + processDefs(&MI, false); processUndefReads(MBB); leaveBasicBlock(MBB); } diff --git a/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp b/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp index 55e809e24278..90ddac94f93b 100644 --- a/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp +++ b/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp @@ -50,7 +50,7 @@ bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) { // Iterate through each instruction in the function, looking for pseudos. for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { - MachineBasicBlock *MBB = I; + MachineBasicBlock *MBB = &*I; for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end(); MBBI != MBBE; ) { MachineInstr *MI = MBBI++; @@ -63,7 +63,7 @@ bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) { // The expansion may involve new basic blocks. if (NewMBB != MBB) { MBB = NewMBB; - I = NewMBB; + I = NewMBB->getIterator(); MBBI = NewMBB->begin(); MBBE = NewMBB->end(); } diff --git a/contrib/llvm/lib/CodeGen/FuncletLayout.cpp b/contrib/llvm/lib/CodeGen/FuncletLayout.cpp new file mode 100644 index 000000000000..8b2f505ff028 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/FuncletLayout.cpp @@ -0,0 +1,55 @@ +//===-- FuncletLayout.cpp - Contiguously lay out funclets -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements basic block placement transformations which result in +// funclets being contiguous. +// +//===----------------------------------------------------------------------===// +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +using namespace llvm; + +#define DEBUG_TYPE "funclet-layout" + +namespace { +class FuncletLayout : public MachineFunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + FuncletLayout() : MachineFunctionPass(ID) { + initializeFuncletLayoutPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &F) override; +}; +} + +char FuncletLayout::ID = 0; +char &llvm::FuncletLayoutID = FuncletLayout::ID; +INITIALIZE_PASS(FuncletLayout, "funclet-layout", + "Contiguously Lay Out Funclets", false, false) + +bool FuncletLayout::runOnMachineFunction(MachineFunction &F) { + DenseMap<const MachineBasicBlock *, int> FuncletMembership = + getFuncletMembership(F); + if (FuncletMembership.empty()) + return false; + + F.sort([&](MachineBasicBlock &X, MachineBasicBlock &Y) { + auto FuncletX = FuncletMembership.find(&X); + auto FuncletY = FuncletMembership.find(&Y); + assert(FuncletX != FuncletMembership.end()); + assert(FuncletY != FuncletMembership.end()); + return FuncletX->second < FuncletY->second; + }); + + // Conservatively assume we changed something. + return true; +} diff --git a/contrib/llvm/lib/CodeGen/GCRootLowering.cpp b/contrib/llvm/lib/CodeGen/GCRootLowering.cpp index d8edd7e4063f..484d31737b2e 100644 --- a/contrib/llvm/lib/CodeGen/GCRootLowering.cpp +++ b/contrib/llvm/lib/CodeGen/GCRootLowering.cpp @@ -158,7 +158,7 @@ static bool InsertRootInitializers(Function &F, AllocaInst **Roots, // Search for initializers in the initial BB. SmallPtrSet<AllocaInst *, 16> InitedRoots; - for (; !CouldBecomeSafePoint(IP); ++IP) + for (; !CouldBecomeSafePoint(&*IP); ++IP) if (StoreInst *SI = dyn_cast<StoreInst>(IP)) if (AllocaInst *AI = dyn_cast<AllocaInst>(SI->getOperand(1)->stripPointerCasts())) @@ -320,7 +320,9 @@ void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) { if (MF.getFrameInfo()->isDeadObjectIndex(RI->Num)) { RI = FI->removeStackRoot(RI); } else { - RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num); + unsigned FrameReg; // FIXME: surely GCRoot ought to store the + // register that the offset is from? + RI->StackOffset = TFI->getFrameIndexReference(MF, RI->Num, FrameReg); ++RI; } } diff --git a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp index 6f9e8394081e..dd9a84086181 100644 --- a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp @@ -108,10 +108,9 @@ EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden, // FIXME: this could be a transitional option, and we probably need to remove // it if only we are sure this optimization could always benefit all targets. -static cl::opt<bool> +static cl::opt<cl::boolOrDefault> EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden, - cl::desc("Enable global merge pass on external linkage"), - cl::init(false)); + cl::desc("Enable global merge pass on external linkage")); STATISTIC(NumMerged, "Number of globals merged"); namespace { @@ -129,11 +128,14 @@ namespace { /// FIXME: This could learn about optsize, and be used in the cost model. bool OnlyOptimizeForSize; + /// Whether we should merge global variables that have external linkage. + bool MergeExternalGlobals; + bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals, Module &M, bool isConst, unsigned AddrSpace) const; /// \brief Merge everything in \p Globals for which the corresponding bit /// in \p GlobalSet is set. - bool doMerge(SmallVectorImpl<GlobalVariable *> &Globals, + bool doMerge(const SmallVectorImpl<GlobalVariable *> &Globals, const BitVector &GlobalSet, Module &M, bool isConst, unsigned AddrSpace) const; @@ -158,9 +160,11 @@ namespace { static char ID; // Pass identification, replacement for typeid. explicit GlobalMerge(const TargetMachine *TM = nullptr, unsigned MaximalOffset = 0, - bool OnlyOptimizeForSize = false) + bool OnlyOptimizeForSize = false, + bool MergeExternalGlobals = false) : FunctionPass(ID), TM(TM), MaxOffset(MaximalOffset), - OnlyOptimizeForSize(OnlyOptimizeForSize) { + OnlyOptimizeForSize(OnlyOptimizeForSize), + MergeExternalGlobals(MergeExternalGlobals) { initializeGlobalMergePass(*PassRegistry::getPassRegistry()); } @@ -189,14 +193,11 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, Module &M, bool isConst, unsigned AddrSpace) const { auto &DL = M.getDataLayout(); // FIXME: Find better heuristics - std::stable_sort( - Globals.begin(), Globals.end(), - [&DL](const GlobalVariable *GV1, const GlobalVariable *GV2) { - Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType(); - Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType(); - - return (DL.getTypeAllocSize(Ty1) < DL.getTypeAllocSize(Ty2)); - }); + std::stable_sort(Globals.begin(), Globals.end(), + [&DL](const GlobalVariable *GV1, const GlobalVariable *GV2) { + return DL.getTypeAllocSize(GV1->getValueType()) < + DL.getTypeAllocSize(GV2->getValueType()); + }); // If we want to just blindly group all globals together, do so. if (!GlobalMergeGroupByUse) { @@ -207,7 +208,7 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, // If we want to be smarter, look at all uses of each global, to try to // discover all sets of globals used together, and how many times each of - // these sets occured. + // these sets occurred. // // Keep this reasonably efficient, by having an append-only list of all sets // discovered so far (UsedGlobalSet), and mapping each "together-ness" unit of @@ -302,8 +303,7 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, Function *ParentFn = I->getParent()->getParent(); // If we're only optimizing for size, ignore non-minsize functions. - if (OnlyOptimizeForSize && - !ParentFn->hasFnAttribute(Attribute::MinSize)) + if (OnlyOptimizeForSize && !ParentFn->optForMinSize()) continue; size_t UGSIdx = GlobalUsesByFunction[ParentFn]; @@ -406,15 +406,14 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, return Changed; } -bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable *> &Globals, +bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals, const BitVector &GlobalSet, Module &M, bool isConst, unsigned AddrSpace) const { + assert(Globals.size() > 1); Type *Int32Ty = Type::getInt32Ty(M.getContext()); auto &DL = M.getDataLayout(); - assert(Globals.size() > 1); - DEBUG(dbgs() << " Trying to merge set, starts with #" << GlobalSet.find_first() << "\n"); @@ -425,58 +424,44 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable *> &Globals, std::vector<Type*> Tys; std::vector<Constant*> Inits; - bool HasExternal = false; - GlobalVariable *TheFirstExternal = 0; for (j = i; j != -1; j = GlobalSet.find_next(j)) { - Type *Ty = Globals[j]->getType()->getElementType(); + Type *Ty = Globals[j]->getValueType(); MergedSize += DL.getTypeAllocSize(Ty); if (MergedSize > MaxOffset) { break; } Tys.push_back(Ty); Inits.push_back(Globals[j]->getInitializer()); - - if (Globals[j]->hasExternalLinkage() && !HasExternal) { - HasExternal = true; - TheFirstExternal = Globals[j]; - } } - // If merged variables doesn't have external linkage, we needn't to expose - // the symbol after merging. - GlobalValue::LinkageTypes Linkage = HasExternal - ? GlobalValue::ExternalLinkage - : GlobalValue::InternalLinkage; - StructType *MergedTy = StructType::get(M.getContext(), Tys); Constant *MergedInit = ConstantStruct::get(MergedTy, Inits); - // If merged variables have external linkage, we use symbol name of the - // first variable merged as the suffix of global symbol name. This would - // be able to avoid the link-time naming conflict for globalm symbols. GlobalVariable *MergedGV = new GlobalVariable( - M, MergedTy, isConst, Linkage, MergedInit, - HasExternal ? "_MergedGlobals_" + TheFirstExternal->getName() - : "_MergedGlobals", - nullptr, GlobalVariable::NotThreadLocal, AddrSpace); + M, MergedTy, isConst, GlobalValue::PrivateLinkage, MergedInit, + "_MergedGlobals", nullptr, GlobalVariable::NotThreadLocal, AddrSpace); - for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k)) { + for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) { GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage(); std::string Name = Globals[k]->getName(); Constant *Idx[2] = { ConstantInt::get(Int32Ty, 0), - ConstantInt::get(Int32Ty, idx++) + ConstantInt::get(Int32Ty, idx), }; Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedTy, MergedGV, Idx); Globals[k]->replaceAllUsesWith(GEP); Globals[k]->eraseFromParent(); - if (Linkage != GlobalValue::InternalLinkage) { - // Generate a new alias... - auto *PTy = cast<PointerType>(GEP->getType()); - GlobalAlias::create(PTy, Linkage, Name, GEP, &M); + // When the linkage is not internal we must emit an alias for the original + // variable name as it may be accessed from another object. On non-Mach-O + // we can also emit an alias for internal linkage as it's safe to do so. + // It's not safe on Mach-O as the alias (and thus the portion of the + // MergedGlobals variable) may be dead stripped at link time. + if (Linkage != GlobalValue::InternalLinkage || + !TM->getTargetTriple().isOSBinFormatMachO()) { + GlobalAlias::create(Tys[idx], AddrSpace, Linkage, Name, GEP, &M); } NumMerged++; @@ -535,61 +520,57 @@ bool GlobalMerge::doInitialization(Module &M) { setMustKeepGlobalVariables(M); // Grab all non-const globals. - for (Module::global_iterator I = M.global_begin(), - E = M.global_end(); I != E; ++I) { + for (auto &GV : M.globals()) { // Merge is safe for "normal" internal or external globals only - if (I->isDeclaration() || I->isThreadLocal() || I->hasSection()) + if (GV.isDeclaration() || GV.isThreadLocal() || GV.hasSection()) continue; - if (!(EnableGlobalMergeOnExternal && I->hasExternalLinkage()) && - !I->hasInternalLinkage()) + if (!(MergeExternalGlobals && GV.hasExternalLinkage()) && + !GV.hasInternalLinkage()) continue; - PointerType *PT = dyn_cast<PointerType>(I->getType()); + PointerType *PT = dyn_cast<PointerType>(GV.getType()); assert(PT && "Global variable is not a pointer!"); unsigned AddressSpace = PT->getAddressSpace(); // Ignore fancy-aligned globals for now. - unsigned Alignment = DL.getPreferredAlignment(I); - Type *Ty = I->getType()->getElementType(); + unsigned Alignment = DL.getPreferredAlignment(&GV); + Type *Ty = GV.getValueType(); if (Alignment > DL.getABITypeAlignment(Ty)) continue; // Ignore all 'special' globals. - if (I->getName().startswith("llvm.") || - I->getName().startswith(".llvm.")) + if (GV.getName().startswith("llvm.") || + GV.getName().startswith(".llvm.")) continue; // Ignore all "required" globals: - if (isMustKeepGlobalVariable(I)) + if (isMustKeepGlobalVariable(&GV)) continue; if (DL.getTypeAllocSize(Ty) < MaxOffset) { - if (TargetLoweringObjectFile::getKindForGlobal(I, *TM).isBSSLocal()) - BSSGlobals[AddressSpace].push_back(I); - else if (I->isConstant()) - ConstGlobals[AddressSpace].push_back(I); + if (TargetLoweringObjectFile::getKindForGlobal(&GV, *TM).isBSSLocal()) + BSSGlobals[AddressSpace].push_back(&GV); + else if (GV.isConstant()) + ConstGlobals[AddressSpace].push_back(&GV); else - Globals[AddressSpace].push_back(I); + Globals[AddressSpace].push_back(&GV); } } - for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator - I = Globals.begin(), E = Globals.end(); I != E; ++I) - if (I->second.size() > 1) - Changed |= doMerge(I->second, M, false, I->first); + for (auto &P : Globals) + if (P.second.size() > 1) + Changed |= doMerge(P.second, M, false, P.first); - for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator - I = BSSGlobals.begin(), E = BSSGlobals.end(); I != E; ++I) - if (I->second.size() > 1) - Changed |= doMerge(I->second, M, false, I->first); + for (auto &P : BSSGlobals) + if (P.second.size() > 1) + Changed |= doMerge(P.second, M, false, P.first); if (EnableGlobalMergeOnConst) - for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator - I = ConstGlobals.begin(), E = ConstGlobals.end(); I != E; ++I) - if (I->second.size() > 1) - Changed |= doMerge(I->second, M, true, I->first); + for (auto &P : ConstGlobals) + if (P.second.size() > 1) + Changed |= doMerge(P.second, M, true, P.first); return Changed; } @@ -604,6 +585,9 @@ bool GlobalMerge::doFinalization(Module &M) { } Pass *llvm::createGlobalMergePass(const TargetMachine *TM, unsigned Offset, - bool OnlyOptimizeForSize) { - return new GlobalMerge(TM, Offset, OnlyOptimizeForSize); + bool OnlyOptimizeForSize, + bool MergeExternalByDefault) { + bool MergeExternal = (EnableGlobalMergeOnExternal == cl::BOU_UNSET) ? + MergeExternalByDefault : (EnableGlobalMergeOnExternal == cl::BOU_TRUE); + return new GlobalMerge(TM, Offset, OnlyOptimizeForSize, MergeExternal); } diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp index ee0532bfc630..c38c9d22266e 100644 --- a/contrib/llvm/lib/CodeGen/IfConversion.cpp +++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp @@ -32,6 +32,7 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include <algorithm> using namespace llvm; @@ -190,10 +191,10 @@ namespace { private: bool ReverseBranchCondition(BBInfo &BBI); bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups, - const BranchProbability &Prediction) const; + BranchProbability Prediction) const; bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, bool FalseBranch, unsigned &Dups, - const BranchProbability &Prediction) const; + BranchProbability Prediction) const; bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, unsigned &Dups1, unsigned &Dups2) const; void ScanInstructions(BBInfo &BBI); @@ -218,7 +219,7 @@ namespace { bool MeetIfcvtSizeLimit(MachineBasicBlock &BB, unsigned Cycle, unsigned Extra, - const BranchProbability &Prediction) const { + BranchProbability Prediction) const { return Cycle > 0 && TII->isProfitableToIfCvt(BB, Cycle, Extra, Prediction); } @@ -227,7 +228,7 @@ namespace { unsigned TCycle, unsigned TExtra, MachineBasicBlock &FBB, unsigned FCycle, unsigned FExtra, - const BranchProbability &Prediction) const { + BranchProbability Prediction) const { return TCycle > 0 && FCycle > 0 && TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra, Prediction); @@ -462,11 +463,11 @@ bool IfConverter::ReverseBranchCondition(BBInfo &BBI) { /// getNextBlock - Returns the next block in the function blocks ordering. If /// it is the end, returns NULL. static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) { - MachineFunction::iterator I = BB; + MachineFunction::iterator I = BB->getIterator(); MachineFunction::iterator E = BB->getParent()->end(); if (++I == E) return nullptr; - return I; + return &*I; } /// ValidSimple - Returns true if the 'true' block (along with its @@ -474,7 +475,7 @@ static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) { /// number of instructions that the ifcvt would need to duplicate if performed /// in Dups. bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups, - const BranchProbability &Prediction) const { + BranchProbability Prediction) const { Dups = 0; if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone) return false; @@ -501,7 +502,7 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups, /// if performed in 'Dups'. bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, bool FalseBranch, unsigned &Dups, - const BranchProbability &Prediction) const { + BranchProbability Prediction) const { Dups = 0; if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone) return false; @@ -530,10 +531,10 @@ bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, MachineBasicBlock *TExit = FalseBranch ? TrueBBI.FalseBB : TrueBBI.TrueBB; if (!TExit && blockAlwaysFallThrough(TrueBBI)) { - MachineFunction::iterator I = TrueBBI.BB; + MachineFunction::iterator I = TrueBBI.BB->getIterator(); if (++I == TrueBBI.BB->getParent()->end()) return false; - TExit = I; + TExit = &*I; } return TExit && TExit == FalseBBI.BB; } @@ -948,10 +949,8 @@ void IfConverter::AnalyzeBlock(MachineBasicBlock *MBB, /// candidates. void IfConverter::AnalyzeBlocks(MachineFunction &MF, std::vector<IfcvtToken*> &Tokens) { - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { - MachineBasicBlock *BB = I; - AnalyzeBlock(BB, Tokens); - } + for (auto &BB : MF) + AnalyzeBlock(&BB, Tokens); // Sort to favor more complex ifcvt scheme. std::stable_sort(Tokens.begin(), Tokens.end(), IfcvtTokenCmp); @@ -961,14 +960,14 @@ void IfConverter::AnalyzeBlocks(MachineFunction &MF, /// that all the intervening blocks are empty (given BB can fall through to its /// next block). static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) { - MachineFunction::iterator PI = BB; + MachineFunction::iterator PI = BB->getIterator(); MachineFunction::iterator I = std::next(PI); - MachineFunction::iterator TI = ToBB; + MachineFunction::iterator TI = ToBB->getIterator(); MachineFunction::iterator E = BB->getParent()->end(); while (I != TI) { // Check isSuccessor to avoid case where the next block is empty, but // it's not a successor. - if (I == E || !I->empty() || !PI->isSuccessor(I)) + if (I == E || !I->empty() || !PI->isSuccessor(&*I)) return false; PI = I++; } @@ -1114,7 +1113,7 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { // RemoveExtraEdges won't work if the block has an unanalyzable branch, so // explicitly remove CvtBBI as a successor. - BBI.BB->removeSuccessor(CvtBBI->BB); + BBI.BB->removeSuccessor(CvtBBI->BB, true); } else { RemoveKills(CvtBBI->BB->begin(), CvtBBI->BB->end(), DontKill, *TRI); PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond); @@ -1153,28 +1152,6 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { return true; } -/// Scale down weights to fit into uint32_t. NewTrue is the new weight -/// for successor TrueBB, and NewFalse is the new weight for successor -/// FalseBB. -static void ScaleWeights(uint64_t NewTrue, uint64_t NewFalse, - MachineBasicBlock *MBB, - const MachineBasicBlock *TrueBB, - const MachineBasicBlock *FalseBB, - const MachineBranchProbabilityInfo *MBPI) { - uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse; - uint32_t Scale = (NewMax / UINT32_MAX) + 1; - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); - SI != SE; ++SI) { - if (*SI == TrueBB) - MBB->setSuccWeight(SI, (uint32_t)(NewTrue / Scale)); - else if (*SI == FalseBB) - MBB->setSuccWeight(SI, (uint32_t)(NewFalse / Scale)); - else - MBB->setSuccWeight(SI, MBPI->getEdgeWeight(MBB, SI) / Scale); - } -} - /// IfConvertTriangle - If convert a triangle sub-CFG. /// bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { @@ -1231,16 +1208,14 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { DontKill.clear(); bool HasEarlyExit = CvtBBI->FalseBB != nullptr; - uint64_t CvtNext = 0, CvtFalse = 0, BBNext = 0, BBCvt = 0, SumWeight = 0; - uint32_t WeightScale = 0; + BranchProbability CvtNext, CvtFalse, BBNext, BBCvt; if (HasEarlyExit) { - // Get weights before modifying CvtBBI->BB and BBI.BB. - CvtNext = MBPI->getEdgeWeight(CvtBBI->BB, NextBBI->BB); - CvtFalse = MBPI->getEdgeWeight(CvtBBI->BB, CvtBBI->FalseBB); - BBNext = MBPI->getEdgeWeight(BBI.BB, NextBBI->BB); - BBCvt = MBPI->getEdgeWeight(BBI.BB, CvtBBI->BB); - SumWeight = MBPI->getSumForBlock(CvtBBI->BB, WeightScale); + // Get probabilities before modifying CvtBBI->BB and BBI.BB. + CvtNext = MBPI->getEdgeProbability(CvtBBI->BB, NextBBI->BB); + CvtFalse = MBPI->getEdgeProbability(CvtBBI->BB, CvtBBI->FalseBB); + BBNext = MBPI->getEdgeProbability(BBI.BB, NextBBI->BB); + BBCvt = MBPI->getEdgeProbability(BBI.BB, CvtBBI->BB); } if (CvtBBI->BB->pred_size() > 1) { @@ -1251,7 +1226,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { // RemoveExtraEdges won't work if the block has an unanalyzable branch, so // explicitly remove CvtBBI as a successor. - BBI.BB->removeSuccessor(CvtBBI->BB); + BBI.BB->removeSuccessor(CvtBBI->BB, true); } else { // Predicate the 'true' block after removing its branch. CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB); @@ -1268,22 +1243,23 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { CvtBBI->BrCond.end()); if (TII->ReverseBranchCondition(RevCond)) llvm_unreachable("Unable to reverse branch condition!"); + + // Update the edge probability for both CvtBBI->FalseBB and NextBBI. + // NewNext = New_Prob(BBI.BB, NextBBI->BB) = + // Prob(BBI.BB, NextBBI->BB) + + // Prob(BBI.BB, CvtBBI->BB) * Prob(CvtBBI->BB, NextBBI->BB) + // NewFalse = New_Prob(BBI.BB, CvtBBI->FalseBB) = + // Prob(BBI.BB, CvtBBI->BB) * Prob(CvtBBI->BB, CvtBBI->FalseBB) + auto NewTrueBB = getNextBlock(BBI.BB); + auto NewNext = BBNext + BBCvt * CvtNext; + auto NewTrueBBIter = + std::find(BBI.BB->succ_begin(), BBI.BB->succ_end(), NewTrueBB); + if (NewTrueBBIter != BBI.BB->succ_end()) + BBI.BB->setSuccProbability(NewTrueBBIter, NewNext); + + auto NewFalse = BBCvt * CvtFalse; TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, nullptr, RevCond, dl); - BBI.BB->addSuccessor(CvtBBI->FalseBB); - // Update the edge weight for both CvtBBI->FalseBB and NextBBI. - // New_Weight(BBI.BB, NextBBI->BB) = - // Weight(BBI.BB, NextBBI->BB) * getSumForBlock(CvtBBI->BB) + - // Weight(BBI.BB, CvtBBI->BB) * Weight(CvtBBI->BB, NextBBI->BB) - // New_Weight(BBI.BB, CvtBBI->FalseBB) = - // Weight(BBI.BB, CvtBBI->BB) * Weight(CvtBBI->BB, CvtBBI->FalseBB) - - uint64_t NewNext = BBNext * SumWeight + (BBCvt * CvtNext) / WeightScale; - uint64_t NewFalse = (BBCvt * CvtFalse) / WeightScale; - // We need to scale down all weights of BBI.BB to fit uint32_t. - // Here BBI.BB is connected to CvtBBI->FalseBB and will fall through to - // the next block. - ScaleWeights(NewNext, NewFalse, BBI.BB, getNextBlock(BBI.BB), - CvtBBI->FalseBB, MBPI); + BBI.BB->addSuccessor(CvtBBI->FalseBB, NewFalse); } // Merge in the 'false' block if the 'false' block has no other @@ -1526,7 +1502,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, MergeBlocks(BBI, TailBBI); TailBBI.IsDone = true; } else { - BBI.BB->addSuccessor(TailBB); + BBI.BB->addSuccessor(TailBB, BranchProbability::getOne()); InsertUncondBranch(BBI.BB, TailBB, TII); BBI.HasFallThrough = false; } @@ -1536,7 +1512,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, // which can happen here if TailBB is unanalyzable and is merged, so // explicitly remove BBI1 and BBI2 as successors. BBI.BB->removeSuccessor(BBI1->BB); - BBI.BB->removeSuccessor(BBI2->BB); + BBI.BB->removeSuccessor(BBI2->BB, true); RemoveExtraEdges(BBI); // Update block info. @@ -1686,25 +1662,94 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { ToBBI.BB->splice(ToBBI.BB->end(), FromBBI.BB, FromBBI.BB->begin(), FromBBI.BB->end()); - std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(), - FromBBI.BB->succ_end()); + // Force normalizing the successors' probabilities of ToBBI.BB to convert all + // unknown probabilities into known ones. + // FIXME: This usage is too tricky and in the future we would like to + // eliminate all unknown probabilities in MBB. + ToBBI.BB->normalizeSuccProbs(); + + SmallVector<MachineBasicBlock *, 4> FromSuccs(FromBBI.BB->succ_begin(), + FromBBI.BB->succ_end()); MachineBasicBlock *NBB = getNextBlock(FromBBI.BB); MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : nullptr; + // The edge probability from ToBBI.BB to FromBBI.BB, which is only needed when + // AddEdges is true and FromBBI.BB is a successor of ToBBI.BB. + auto To2FromProb = BranchProbability::getZero(); + if (AddEdges && ToBBI.BB->isSuccessor(FromBBI.BB)) { + To2FromProb = MBPI->getEdgeProbability(ToBBI.BB, FromBBI.BB); + // Set the edge probability from ToBBI.BB to FromBBI.BB to zero to avoid the + // edge probability being merged to other edges when this edge is removed + // later. + ToBBI.BB->setSuccProbability( + std::find(ToBBI.BB->succ_begin(), ToBBI.BB->succ_end(), FromBBI.BB), + BranchProbability::getZero()); + } - for (unsigned i = 0, e = Succs.size(); i != e; ++i) { - MachineBasicBlock *Succ = Succs[i]; + for (unsigned i = 0, e = FromSuccs.size(); i != e; ++i) { + MachineBasicBlock *Succ = FromSuccs[i]; // Fallthrough edge can't be transferred. if (Succ == FallThrough) continue; + + auto NewProb = BranchProbability::getZero(); + if (AddEdges) { + // Calculate the edge probability for the edge from ToBBI.BB to Succ, + // which is a portion of the edge probability from FromBBI.BB to Succ. The + // portion ratio is the edge probability from ToBBI.BB to FromBBI.BB (if + // FromBBI is a successor of ToBBI.BB. See comment below for excepion). + NewProb = MBPI->getEdgeProbability(FromBBI.BB, Succ); + + // To2FromProb is 0 when FromBBI.BB is not a successor of ToBBI.BB. This + // only happens when if-converting a diamond CFG and FromBBI.BB is the + // tail BB. In this case FromBBI.BB post-dominates ToBBI.BB and hence we + // could just use the probabilities on FromBBI.BB's out-edges when adding + // new successors. + if (!To2FromProb.isZero()) + NewProb *= To2FromProb; + } + FromBBI.BB->removeSuccessor(Succ); - if (AddEdges && !ToBBI.BB->isSuccessor(Succ)) - ToBBI.BB->addSuccessor(Succ); + + if (AddEdges) { + // If the edge from ToBBI.BB to Succ already exists, update the + // probability of this edge by adding NewProb to it. An example is shown + // below, in which A is ToBBI.BB and B is FromBBI.BB. In this case we + // don't have to set C as A's successor as it already is. We only need to + // update the edge probability on A->C. Note that B will not be + // immediately removed from A's successors. It is possible that B->D is + // not removed either if D is a fallthrough of B. Later the edge A->D + // (generated here) and B->D will be combined into one edge. To maintain + // correct edge probability of this combined edge, we need to set the edge + // probability of A->B to zero, which is already done above. The edge + // probability on A->D is calculated by scaling the original probability + // on A->B by the probability of B->D. + // + // Before ifcvt: After ifcvt (assume B->D is kept): + // + // A A + // /| /|\ + // / B / B| + // | /| | || + // |/ | | |/ + // C D C D + // + if (ToBBI.BB->isSuccessor(Succ)) + ToBBI.BB->setSuccProbability( + std::find(ToBBI.BB->succ_begin(), ToBBI.BB->succ_end(), Succ), + MBPI->getEdgeProbability(ToBBI.BB, Succ) + NewProb); + else + ToBBI.BB->addSuccessor(Succ, NewProb); + } } // Now FromBBI always falls through to the next block! if (NBB && !FromBBI.BB->isSuccessor(NBB)) FromBBI.BB->addSuccessor(NBB); + // Normalize the probabilities of ToBBI.BB's successors with all adjustment + // we've done above. + ToBBI.BB->normalizeSuccProbs(); + ToBBI.Predicate.append(FromBBI.Predicate.begin(), FromBBI.Predicate.end()); FromBBI.Predicate.clear(); diff --git a/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp index 93e04876a8f3..39c1b9fb9a66 100644 --- a/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp +++ b/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp @@ -38,6 +38,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -107,6 +108,98 @@ public: bool runOnMachineFunction(MachineFunction &MF) override; }; + +/// \brief Detect re-ordering hazards and dependencies. +/// +/// This class keeps track of defs and uses, and can be queried if a given +/// machine instruction can be re-ordered from after the machine instructions +/// seen so far to before them. +class HazardDetector { + DenseSet<unsigned> RegDefs; + DenseSet<unsigned> RegUses; + const TargetRegisterInfo &TRI; + bool hasSeenClobber; + +public: + explicit HazardDetector(const TargetRegisterInfo &TRI) : + TRI(TRI), hasSeenClobber(false) {} + + /// \brief Make a note of \p MI for later queries to isSafeToHoist. + /// + /// May clobber this HazardDetector instance. \see isClobbered. + void rememberInstruction(MachineInstr *MI); + + /// \brief Return true if it is safe to hoist \p MI from after all the + /// instructions seen so far (via rememberInstruction) to before it. + bool isSafeToHoist(MachineInstr *MI); + + /// \brief Return true if this instance of HazardDetector has been clobbered + /// (i.e. has no more useful information). + /// + /// A HazardDetecter is clobbered when it sees a construct it cannot + /// understand, and it would have to return a conservative answer for all + /// future queries. Having a separate clobbered state lets the client code + /// bail early, without making queries about all of the future instructions + /// (which would have returned the most conservative answer anyway). + /// + /// Calling rememberInstruction or isSafeToHoist on a clobbered HazardDetector + /// is an error. + bool isClobbered() { return hasSeenClobber; } +}; +} + + +void HazardDetector::rememberInstruction(MachineInstr *MI) { + assert(!isClobbered() && + "Don't add instructions to a clobbered hazard detector"); + + if (MI->mayStore() || MI->hasUnmodeledSideEffects()) { + hasSeenClobber = true; + return; + } + + for (auto *MMO : MI->memoperands()) { + // Right now we don't want to worry about LLVM's memory model. + if (!MMO->isUnordered()) { + hasSeenClobber = true; + return; + } + } + + for (auto &MO : MI->operands()) { + if (!MO.isReg() || !MO.getReg()) + continue; + + if (MO.isDef()) + RegDefs.insert(MO.getReg()); + else + RegUses.insert(MO.getReg()); + } +} + +bool HazardDetector::isSafeToHoist(MachineInstr *MI) { + assert(!isClobbered() && "isSafeToHoist cannot do anything useful!"); + + // Right now we don't want to worry about LLVM's memory model. This can be + // made more precise later. + for (auto *MMO : MI->memoperands()) + if (!MMO->isUnordered()) + return false; + + for (auto &MO : MI->operands()) { + if (MO.isReg() && MO.getReg()) { + for (unsigned Reg : RegDefs) + if (TRI.regsOverlap(Reg, MO.getReg())) + return false; // We found a write-after-write or read-after-write + + if (MO.isDef()) + for (unsigned Reg : RegUses) + if (TRI.regsOverlap(Reg, MO.getReg())) + return false; // We found a write-after-read + } + } + + return true; } bool ImplicitNullChecks::runOnMachineFunction(MachineFunction &MF) { @@ -132,10 +225,10 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks( MachineBasicBlock &MBB, SmallVectorImpl<NullCheck> &NullCheckList) { typedef TargetInstrInfo::MachineBranchPredicate MachineBranchPredicate; - MDNode *BranchMD = - MBB.getBasicBlock() - ? MBB.getBasicBlock()->getTerminator()->getMetadata("make.implicit") - : nullptr; + MDNode *BranchMD = nullptr; + if (auto *BB = MBB.getBasicBlock()) + BranchMD = BB->getTerminator()->getMetadata(LLVMContext::MD_make_implicit); + if (!BranchMD) return false; @@ -188,7 +281,7 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks( // // we want to end up with // - // Def = TrappingLoad (%RAX + <offset>), LblNull + // Def = FaultingLoad (%RAX + <offset>), LblNull // jmp LblNotNull ;; explicit or fallthrough // // LblNotNull: @@ -199,38 +292,34 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks( // LblNull: // callq throw_NullPointerException // + // + // To see why this is legal, consider the two possibilities: + // + // 1. %RAX is null: since we constrain <offset> to be less than PageSize, the + // load instruction dereferences the null page, causing a segmentation + // fault. + // + // 2. %RAX is not null: in this case we know that the load cannot fault, as + // otherwise the load would've faulted in the original program too and the + // original program would've been undefined. + // + // This reasoning cannot be extended to justify hoisting through arbitrary + // control flow. For instance, in the example below (in pseudo-C) + // + // if (ptr == null) { throw_npe(); unreachable; } + // if (some_cond) { return 42; } + // v = ptr->field; // LD + // ... + // + // we cannot (without code duplication) use the load marked "LD" to null check + // ptr -- clause (2) above does not apply in this case. In the above program + // the safety of ptr->field can be dependent on some_cond; and, for instance, + // ptr could be some non-null invalid reference that never gets loaded from + // because some_cond is always true. unsigned PointerReg = MBP.LHS.getReg(); - // As we scan NotNullSucc for a suitable load instruction, we keep track of - // the registers defined and used by the instructions we scan past. This bit - // of information lets us decide if it is legal to hoist the load instruction - // we find (if we do find such an instruction) to before NotNullSucc. - DenseSet<unsigned> RegDefs, RegUses; - - // Returns true if it is safe to reorder MI to before NotNullSucc. - auto IsSafeToHoist = [&](MachineInstr *MI) { - // Right now we don't want to worry about LLVM's memory model. This can be - // made more precise later. - for (auto *MMO : MI->memoperands()) - if (!MMO->isUnordered()) - return false; - - for (auto &MO : MI->operands()) { - if (MO.isReg() && MO.getReg()) { - for (unsigned Reg : RegDefs) - if (TRI->regsOverlap(Reg, MO.getReg())) - return false; // We found a write-after-write or read-after-write - - if (MO.isDef()) - for (unsigned Reg : RegUses) - if (TRI->regsOverlap(Reg, MO.getReg())) - return false; // We found a write-after-read - } - } - - return true; - }; + HazardDetector HD(*TRI); for (auto MII = NotNullSucc->begin(), MIE = NotNullSucc->end(); MII != MIE; ++MII) { @@ -238,37 +327,16 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks( unsigned BaseReg, Offset; if (TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI)) if (MI->mayLoad() && !MI->isPredicable() && BaseReg == PointerReg && - Offset < PageSize && MI->getDesc().getNumDefs() == 1 && - IsSafeToHoist(MI)) { + Offset < PageSize && MI->getDesc().getNumDefs() <= 1 && + HD.isSafeToHoist(MI)) { NullCheckList.emplace_back(MI, MBP.ConditionDef, &MBB, NotNullSucc, NullSucc); return true; } - // MI did not match our criteria for conversion to a trapping load. Check - // if we can continue looking. - - if (MI->mayStore() || MI->hasUnmodeledSideEffects()) + HD.rememberInstruction(MI); + if (HD.isClobbered()) return false; - - for (auto *MMO : MI->memoperands()) - // Right now we don't want to worry about LLVM's memory model. - if (!MMO->isUnordered()) - return false; - - // It _may_ be okay to reorder a later load instruction across MI. Make a - // note of its operands so that we can make the legality check if we find a - // suitable load instruction: - - for (auto &MO : MI->operands()) { - if (!MO.isReg() || !MO.getReg()) - continue; - - if (MO.isDef()) - RegDefs.insert(MO.getReg()); - else - RegUses.insert(MO.getReg()); - } } return false; @@ -281,14 +349,19 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks( MachineInstr *ImplicitNullChecks::insertFaultingLoad(MachineInstr *LoadMI, MachineBasicBlock *MBB, MCSymbol *HandlerLabel) { + const unsigned NoRegister = 0; // Guaranteed to be the NoRegister value for + // all targets. + DebugLoc DL; unsigned NumDefs = LoadMI->getDesc().getNumDefs(); - assert(NumDefs == 1 && "other cases unhandled!"); - (void)NumDefs; + assert(NumDefs <= 1 && "other cases unhandled!"); - unsigned DefReg = LoadMI->defs().begin()->getReg(); - assert(std::distance(LoadMI->defs().begin(), LoadMI->defs().end()) == 1 && - "expected exactly one def!"); + unsigned DefReg = NoRegister; + if (NumDefs != 0) { + DefReg = LoadMI->defs().begin()->getReg(); + assert(std::distance(LoadMI->defs().begin(), LoadMI->defs().end()) == 1 && + "expected exactly one def!"); + } auto MIB = BuildMI(MBB, DL, TII->get(TargetOpcode::FAULTING_LOAD_OP), DefReg) .addSym(HandlerLabel) diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp index 9989f233d09c..e31013266bc7 100644 --- a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp @@ -141,7 +141,7 @@ public: InlineSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm) : MF(mf), LIS(pass.getAnalysis<LiveIntervals>()), LSS(pass.getAnalysis<LiveStacks>()), - AA(&pass.getAnalysis<AliasAnalysis>()), + AA(&pass.getAnalysis<AAResultsWrapperPass>().getAAResults()), MDT(pass.getAnalysis<MachineDominatorTree>()), Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm), MFI(*mf.getFrameInfo()), MRI(mf.getRegInfo()), @@ -329,8 +329,8 @@ static raw_ostream &operator<<(raw_ostream &OS, if (SVI.KillsSource) OS << " kill"; OS << " deps["; - for (unsigned i = 0, e = SVI.Deps.size(); i != e; ++i) - OS << ' ' << SVI.Deps[i]->id << '@' << SVI.Deps[i]->def; + for (VNInfo *Dep : SVI.Deps) + OS << ' ' << Dep->id << '@' << Dep->def; OS << " ]"; if (SVI.DefMI) OS << " def: " << *SVI.DefMI; @@ -383,9 +383,8 @@ void InlineSpiller::propagateSiblingValue(SibValueMap::iterator SVIIter, bool PropSpill = !DisableHoisting && !isRegToSpill(SV.SpillReg); unsigned SpillDepth = ~0u; - for (TinyPtrVector<VNInfo*>::iterator DepI = Deps->begin(), - DepE = Deps->end(); DepI != DepE; ++DepI) { - SibValueMap::iterator DepSVI = SibValues.find(*DepI); + for (VNInfo *Dep : *Deps) { + SibValueMap::iterator DepSVI = SibValues.find(Dep); assert(DepSVI != SibValues.end() && "Dependent value not in SibValues"); SibValueInfo &DepSV = DepSVI->second; if (!DepSV.SpillMBB) @@ -566,12 +565,11 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, // Create entries for all the PHIs. Don't add them to the worklist, we // are processing all of them in one go here. - for (unsigned i = 0, e = PHIs.size(); i != e; ++i) - SibValues.insert(std::make_pair(PHIs[i], SibValueInfo(Reg, PHIs[i]))); + for (VNInfo *PHI : PHIs) + SibValues.insert(std::make_pair(PHI, SibValueInfo(Reg, PHI))); // Add every PHI as a dependent of all the non-PHIs. - for (unsigned i = 0, e = NonPHIs.size(); i != e; ++i) { - VNInfo *NonPHI = NonPHIs[i]; + for (VNInfo *NonPHI : NonPHIs) { // Known value? Try an insertion. std::tie(SVI, Inserted) = SibValues.insert(std::make_pair(NonPHI, SibValueInfo(Reg, NonPHI))); @@ -654,8 +652,7 @@ void InlineSpiller::analyzeSiblingValues() { return; LiveInterval &OrigLI = LIS.getInterval(Original); - for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) { - unsigned Reg = RegsToSpill[i]; + for (unsigned Reg : RegsToSpill) { LiveInterval &LI = LIS.getInterval(Reg); for (LiveInterval::const_vni_iterator VI = LI.vni_begin(), VE = LI.vni_end(); VI != VE; ++VI) { @@ -831,9 +828,8 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) { if (VNI->isPHIDef()) { MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def); - for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(*PI)); + for (MachineBasicBlock *P : MBB->predecessors()) { + VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(P)); if (PVNI) WorkList.push_back(std::make_pair(LI, PVNI)); } @@ -920,8 +916,8 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, << *LIS.getInstructionFromIndex(DefIdx)); // Replace operands - for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - MachineOperand &MO = Ops[i].first->getOperand(Ops[i].second); + for (const auto &OpPair : Ops) { + MachineOperand &MO = OpPair.first->getOperand(OpPair.second); if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) { MO.setReg(NewVReg); MO.setIsKill(); @@ -944,8 +940,7 @@ void InlineSpiller::reMaterializeAll() { // Try to remat before all uses of snippets. bool anyRemat = false; - for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) { - unsigned Reg = RegsToSpill[i]; + for (unsigned Reg : RegsToSpill) { LiveInterval &LI = LIS.getInterval(Reg); for (MachineRegisterInfo::reg_bundle_iterator RegI = MRI.reg_bundle_begin(Reg), E = MRI.reg_bundle_end(); @@ -963,8 +958,7 @@ void InlineSpiller::reMaterializeAll() { return; // Remove any values that were completely rematted. - for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) { - unsigned Reg = RegsToSpill[i]; + for (unsigned Reg : RegsToSpill) { LiveInterval &LI = LIS.getInterval(Reg); for (LiveInterval::vni_iterator I = LI.vni_begin(), E = LI.vni_end(); I != E; ++I) { @@ -989,8 +983,7 @@ void InlineSpiller::reMaterializeAll() { // Get rid of deleted and empty intervals. unsigned ResultPos = 0; - for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) { - unsigned Reg = RegsToSpill[i]; + for (unsigned Reg : RegsToSpill) { if (!LIS.hasInterval(Reg)) continue; @@ -1098,9 +1091,9 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied // operands. SmallVector<unsigned, 8> FoldOps; - for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - unsigned Idx = Ops[i].second; - assert(MI == Ops[i].first && "Instruction conflict during operand folding"); + for (const auto &OpPair : Ops) { + unsigned Idx = OpPair.second; + assert(MI == OpPair.first && "Instruction conflict during operand folding"); MachineOperand &MO = MI->getOperand(Idx); if (MO.isImplicit()) { ImpReg = MO.getReg(); @@ -1139,7 +1132,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, continue; MIBundleOperands::PhysRegInfo RI = MIBundleOperands(FoldMI).analyzePhysReg(Reg, &TRI); - if (RI.Defines) + if (RI.FullyDefined) continue; // FoldMI does not define this physreg. Remove the LI segment. assert(MO->isDead() && "Cannot fold physreg def"); @@ -1152,10 +1145,9 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, // Insert any new instructions other than FoldMI into the LIS maps. assert(!MIS.empty() && "Unexpected empty span of instructions!"); - for (MachineBasicBlock::iterator MII = MIS.begin(), End = MIS.end(); - MII != End; ++MII) - if (&*MII != FoldMI) - LIS.InsertMachineInstrInMaps(&*MII); + for (MachineInstr &MI : MIS) + if (&MI != FoldMI) + LIS.InsertMachineInstrInMaps(&MI); // TII.foldMemoryOperand may have left some implicit operands on the // instruction. Strip them. @@ -1301,11 +1293,11 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { // Rewrite instruction operands. bool hasLiveDef = false; - for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - MachineOperand &MO = Ops[i].first->getOperand(Ops[i].second); + for (const auto &OpPair : Ops) { + MachineOperand &MO = OpPair.first->getOperand(OpPair.second); MO.setReg(NewVReg); if (MO.isUse()) { - if (!Ops[i].first->isRegTiedToDefOperand(Ops[i].second)) + if (!OpPair.first->isRegTiedToDefOperand(OpPair.second)) MO.setIsKill(); } else { if (!MO.isDead()) @@ -1335,14 +1327,14 @@ void InlineSpiller::spillAll() { VRM.assignVirt2StackSlot(Edit->getReg(), StackSlot); assert(StackInt->getNumValNums() == 1 && "Bad stack interval values"); - for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) - StackInt->MergeSegmentsInAsValue(LIS.getInterval(RegsToSpill[i]), + for (unsigned Reg : RegsToSpill) + StackInt->MergeSegmentsInAsValue(LIS.getInterval(Reg), StackInt->getValNumInfo(0)); DEBUG(dbgs() << "Merged spilled regs: " << *StackInt << '\n'); // Spill around uses of all RegsToSpill. - for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) - spillAroundUses(RegsToSpill[i]); + for (unsigned Reg : RegsToSpill) + spillAroundUses(Reg); // Hoisted spills may cause dead code. if (!DeadDefs.empty()) { @@ -1351,9 +1343,9 @@ void InlineSpiller::spillAll() { } // Finally delete the SnippetCopies. - for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) { + for (unsigned Reg : RegsToSpill) { for (MachineRegisterInfo::reg_instr_iterator - RI = MRI.reg_instr_begin(RegsToSpill[i]), E = MRI.reg_instr_end(); + RI = MRI.reg_instr_begin(Reg), E = MRI.reg_instr_end(); RI != E; ) { MachineInstr *MI = &*(RI++); assert(SnippetCopies.count(MI) && "Remaining use wasn't a snippet copy"); @@ -1364,8 +1356,8 @@ void InlineSpiller::spillAll() { } // Delete all spilled registers. - for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) - Edit->eraseVirtReg(RegsToSpill[i]); + for (unsigned Reg : RegsToSpill) + Edit->eraseVirtReg(Reg); } void InlineSpiller::spill(LiveRangeEdit &edit) { diff --git a/contrib/llvm/lib/CodeGen/InterferenceCache.cpp b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp index fd5749bfefa0..f8cc24724580 100644 --- a/contrib/llvm/lib/CodeGen/InterferenceCache.cpp +++ b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp @@ -144,7 +144,8 @@ void InterferenceCache::Entry::update(unsigned MBBNum) { PrevPos = Start; } - MachineFunction::const_iterator MFI = MF->getBlockNumbered(MBBNum); + MachineFunction::const_iterator MFI = + MF->getBlockNumbered(MBBNum)->getIterator(); BlockInterference *BI = &Blocks[MBBNum]; ArrayRef<SlotIndex> RegMaskSlots; ArrayRef<const uint32_t*> RegMaskBits; diff --git a/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp index 53c8adc05d77..724f1d61abe2 100644 --- a/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -52,7 +52,7 @@ using namespace llvm; static cl::opt<bool> LowerInterleavedAccesses( "lower-interleaved-accesses", cl::desc("Enable lowering interleaved accesses to intrinsics"), - cl::init(false), cl::Hidden); + cl::init(true), cl::Hidden); static unsigned MaxFactor; // The maximum supported interleave factor. @@ -271,7 +271,7 @@ bool InterleavedAccess::runOnFunction(Function &F) { SmallVector<Instruction *, 32> DeadInsts; bool Changed = false; - for (auto &I : inst_range(F)) { + for (auto &I : instructions(F)) { if (LoadInst *LI = dyn_cast<LoadInst>(&I)) Changed |= lowerInterleavedLoad(LI, DeadInsts); diff --git a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp index 2c95e9e7d0d3..2962f8701625 100644 --- a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp +++ b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp @@ -35,24 +35,24 @@ static void EnsureFunctionExists(Module &M, const char *Name, M.getOrInsertFunction(Name, FunctionType::get(RetTy, ParamTys, false)); } -static void EnsureFPIntrinsicsExist(Module &M, Function *Fn, +static void EnsureFPIntrinsicsExist(Module &M, Function &Fn, const char *FName, const char *DName, const char *LDName) { // Insert definitions for all the floating point types. - switch((int)Fn->arg_begin()->getType()->getTypeID()) { + switch((int)Fn.arg_begin()->getType()->getTypeID()) { case Type::FloatTyID: - EnsureFunctionExists(M, FName, Fn->arg_begin(), Fn->arg_end(), + EnsureFunctionExists(M, FName, Fn.arg_begin(), Fn.arg_end(), Type::getFloatTy(M.getContext())); break; case Type::DoubleTyID: - EnsureFunctionExists(M, DName, Fn->arg_begin(), Fn->arg_end(), + EnsureFunctionExists(M, DName, Fn.arg_begin(), Fn.arg_end(), Type::getDoubleTy(M.getContext())); break; case Type::X86_FP80TyID: case Type::FP128TyID: case Type::PPC_FP128TyID: - EnsureFunctionExists(M, LDName, Fn->arg_begin(), Fn->arg_end(), - Fn->arg_begin()->getType()); + EnsureFunctionExists(M, LDName, Fn.arg_begin(), Fn.arg_end(), + Fn.arg_begin()->getType()); break; } } @@ -67,7 +67,7 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI, Type *RetTy) { // If we haven't already looked up this function, check to see if the // program already contains a function with this name. - Module *M = CI->getParent()->getParent()->getParent(); + Module *M = CI->getModule(); // Get or insert the definition now. std::vector<Type *> ParamTys; for (ArgIt I = ArgBegin; I != ArgEnd; ++I) @@ -75,7 +75,7 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI, Constant* FCache = M->getOrInsertFunction(NewFn, FunctionType::get(RetTy, ParamTys, false)); - IRBuilder<> Builder(CI->getParent(), CI); + IRBuilder<> Builder(CI->getParent(), CI->getIterator()); SmallVector<Value *, 8> Args(ArgBegin, ArgEnd); CallInst *NewCI = Builder.CreateCall(FCache, Args); NewCI->setName(CI->getName()); @@ -94,20 +94,20 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI, void IntrinsicLowering::AddPrototypes(Module &M) { LLVMContext &Context = M.getContext(); - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) - if (I->isDeclaration() && !I->use_empty()) - switch (I->getIntrinsicID()) { + for (auto &F : M) + if (F.isDeclaration() && !F.use_empty()) + switch (F.getIntrinsicID()) { default: break; case Intrinsic::setjmp: - EnsureFunctionExists(M, "setjmp", I->arg_begin(), I->arg_end(), + EnsureFunctionExists(M, "setjmp", F.arg_begin(), F.arg_end(), Type::getInt32Ty(M.getContext())); break; case Intrinsic::longjmp: - EnsureFunctionExists(M, "longjmp", I->arg_begin(), I->arg_end(), + EnsureFunctionExists(M, "longjmp", F.arg_begin(), F.arg_end(), Type::getVoidTy(M.getContext())); break; case Intrinsic::siglongjmp: - EnsureFunctionExists(M, "abort", I->arg_end(), I->arg_end(), + EnsureFunctionExists(M, "abort", F.arg_end(), F.arg_end(), Type::getVoidTy(M.getContext())); break; case Intrinsic::memcpy: @@ -132,31 +132,31 @@ void IntrinsicLowering::AddPrototypes(Module &M) { DL.getIntPtrType(Context), nullptr); break; case Intrinsic::sqrt: - EnsureFPIntrinsicsExist(M, I, "sqrtf", "sqrt", "sqrtl"); + EnsureFPIntrinsicsExist(M, F, "sqrtf", "sqrt", "sqrtl"); break; case Intrinsic::sin: - EnsureFPIntrinsicsExist(M, I, "sinf", "sin", "sinl"); + EnsureFPIntrinsicsExist(M, F, "sinf", "sin", "sinl"); break; case Intrinsic::cos: - EnsureFPIntrinsicsExist(M, I, "cosf", "cos", "cosl"); + EnsureFPIntrinsicsExist(M, F, "cosf", "cos", "cosl"); break; case Intrinsic::pow: - EnsureFPIntrinsicsExist(M, I, "powf", "pow", "powl"); + EnsureFPIntrinsicsExist(M, F, "powf", "pow", "powl"); break; case Intrinsic::log: - EnsureFPIntrinsicsExist(M, I, "logf", "log", "logl"); + EnsureFPIntrinsicsExist(M, F, "logf", "log", "logl"); break; case Intrinsic::log2: - EnsureFPIntrinsicsExist(M, I, "log2f", "log2", "log2l"); + EnsureFPIntrinsicsExist(M, F, "log2f", "log2", "log2l"); break; case Intrinsic::log10: - EnsureFPIntrinsicsExist(M, I, "log10f", "log10", "log10l"); + EnsureFPIntrinsicsExist(M, F, "log10f", "log10", "log10l"); break; case Intrinsic::exp: - EnsureFPIntrinsicsExist(M, I, "expf", "exp", "expl"); + EnsureFPIntrinsicsExist(M, F, "expf", "exp", "expl"); break; case Intrinsic::exp2: - EnsureFPIntrinsicsExist(M, I, "exp2f", "exp2", "exp2l"); + EnsureFPIntrinsicsExist(M, F, "exp2f", "exp2", "exp2l"); break; } } @@ -167,8 +167,8 @@ static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) { assert(V->getType()->isIntegerTy() && "Can't bswap a non-integer type!"); unsigned BitSize = V->getType()->getPrimitiveSizeInBits(); - - IRBuilder<> Builder(IP->getParent(), IP); + + IRBuilder<> Builder(IP); switch(BitSize) { default: llvm_unreachable("Unhandled type size of value to byteswap!"); @@ -268,7 +268,7 @@ static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) { 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL }; - IRBuilder<> Builder(IP->getParent(), IP); + IRBuilder<> Builder(IP); unsigned BitSize = V->getType()->getPrimitiveSizeInBits(); unsigned WordSize = (BitSize + 63) / 64; @@ -301,7 +301,7 @@ static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) { /// instruction IP. static Value *LowerCTLZ(LLVMContext &Context, Value *V, Instruction *IP) { - IRBuilder<> Builder(IP->getParent(), IP); + IRBuilder<> Builder(IP); unsigned BitSize = V->getType()->getPrimitiveSizeInBits(); for (unsigned i = 1; i < BitSize; i <<= 1) { @@ -338,7 +338,7 @@ static void ReplaceFPIntrinsicWithCall(CallInst *CI, const char *Fname, } void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { - IRBuilder<> Builder(CI->getParent(), CI); + IRBuilder<> Builder(CI); LLVMContext &Context = CI->getContext(); const Function *Callee = CI->getCalledFunction(); @@ -424,6 +424,13 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; } + case Intrinsic::get_dynamic_area_offset: + errs() << "WARNING: this target does not support the custom llvm.get." + "dynamic.area.offset. It is being lowered to a constant 0\n"; + // Just lower it to a constant 0 because for most targets + // @llvm.get.dynamic.area.offset is lowered to zero. + CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 0)); + break; case Intrinsic::returnaddress: case Intrinsic::frameaddress: errs() << "WARNING: this target does not support the llvm." @@ -589,7 +596,7 @@ bool IntrinsicLowering::LowerToByteSwap(CallInst *CI) { return false; // Okay, we can do this xform, do so now. - Module *M = CI->getParent()->getParent()->getParent(); + Module *M = CI->getModule(); Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Ty); Value *Op = CI->getArgOperand(0); diff --git a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp index 37299eb664cf..1c27377feee7 100644 --- a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp +++ b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp @@ -82,7 +82,7 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T, } TargetIRAnalysis LLVMTargetMachine::getTargetIRAnalysis() { - return TargetIRAnalysis([this](Function &F) { + return TargetIRAnalysis([this](const Function &F) { return TargetTransformInfo(BasicTTIImpl(this, F)); }); } @@ -125,9 +125,10 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM, PM.add(new MachineFunctionAnalysis(*TM, MFInitializer)); // Enable FastISel with -fast, but allow that to be overridden. + TM->setO0WantsFastISel(EnableFastISelOption != cl::BOU_FALSE); if (EnableFastISelOption == cl::BOU_TRUE || (TM->getOptLevel() == CodeGenOpt::None && - EnableFastISelOption != cl::BOU_FALSE)) + TM->getO0WantsFastISel())) TM->setFastISel(true); // Ask the target for an isel. @@ -202,6 +203,7 @@ bool LLVMTargetMachine::addPassesToEmitFile( Triple T(getTargetTriple().str()); AsmStreamer.reset(getTarget().createMCObjectStreamer( T, *Context, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll, + Options.MCOptions.MCIncrementalLinkerCompatible, /*DWARFMustBeAtTheEnd*/ true)); break; } @@ -254,6 +256,7 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx, const MCSubtargetInfo &STI = *getMCSubtargetInfo(); std::unique_ptr<MCStreamer> AsmStreamer(getTarget().createMCObjectStreamer( T, *Ctx, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll, + Options.MCOptions.MCIncrementalLinkerCompatible, /*DWARFMustBeAtTheEnd*/ true)); // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. diff --git a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp new file mode 100644 index 000000000000..98d30b95dd2d --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp @@ -0,0 +1,405 @@ +//===------ LiveDebugValues.cpp - Tracking Debug Value MIs ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// This pass implements a data flow analysis that propagates debug location +/// information by inserting additional DBG_VALUE instructions into the machine +/// instruction stream. The pass internally builds debug location liveness +/// ranges to determine the points where additional DBG_VALUEs need to be +/// inserted. +/// +/// This is a separate pass from DbgValueHistoryCalculator to facilitate +/// testing and improve modularity. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <deque> +#include <list> + +using namespace llvm; + +#define DEBUG_TYPE "live-debug-values" + +STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted"); + +namespace { + +class LiveDebugValues : public MachineFunctionPass { + +private: + const TargetRegisterInfo *TRI; + const TargetInstrInfo *TII; + + typedef std::pair<const DILocalVariable *, const DILocation *> + InlinedVariable; + + /// A potentially inlined instance of a variable. + struct DebugVariable { + const DILocalVariable *Var; + const DILocation *InlinedAt; + + DebugVariable(const DILocalVariable *_var, const DILocation *_inlinedAt) + : Var(_var), InlinedAt(_inlinedAt) {} + + bool operator==(const DebugVariable &DV) const { + return (Var == DV.Var) && (InlinedAt == DV.InlinedAt); + } + }; + + /// Member variables and functions for Range Extension across basic blocks. + struct VarLoc { + DebugVariable Var; + const MachineInstr *MI; // MachineInstr should be a DBG_VALUE instr. + + VarLoc(DebugVariable _var, const MachineInstr *_mi) : Var(_var), MI(_mi) {} + + bool operator==(const VarLoc &V) const; + }; + + typedef std::list<VarLoc> VarLocList; + typedef SmallDenseMap<const MachineBasicBlock *, VarLocList> VarLocInMBB; + + bool OLChanged; // OutgoingLocs got changed for this bb. + bool MBBJoined; // The MBB was joined. + + void transferDebugValue(MachineInstr &MI, VarLocList &OpenRanges); + void transferRegisterDef(MachineInstr &MI, VarLocList &OpenRanges); + void transferTerminatorInst(MachineInstr &MI, VarLocList &OpenRanges, + VarLocInMBB &OutLocs); + void transfer(MachineInstr &MI, VarLocList &OpenRanges, VarLocInMBB &OutLocs); + + void join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs); + + bool ExtendRanges(MachineFunction &MF); + +public: + static char ID; + + /// Default construct and initialize the pass. + LiveDebugValues(); + + /// Tell the pass manager which passes we depend on and what + /// information we preserve. + void getAnalysisUsage(AnalysisUsage &AU) const override; + + /// Print to ostream with a message. + void printVarLocInMBB(const VarLocInMBB &V, const char *msg, + raw_ostream &Out) const; + + /// Calculate the liveness information for the given machine function. + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} // namespace + +//===----------------------------------------------------------------------===// +// Implementation +//===----------------------------------------------------------------------===// + +char LiveDebugValues::ID = 0; +char &llvm::LiveDebugValuesID = LiveDebugValues::ID; +INITIALIZE_PASS(LiveDebugValues, "livedebugvalues", "Live DEBUG_VALUE analysis", + false, false) + +/// Default construct and initialize the pass. +LiveDebugValues::LiveDebugValues() : MachineFunctionPass(ID) { + initializeLiveDebugValuesPass(*PassRegistry::getPassRegistry()); +} + +/// Tell the pass manager which passes we depend on and what information we +/// preserve. +void LiveDebugValues::getAnalysisUsage(AnalysisUsage &AU) const { + MachineFunctionPass::getAnalysisUsage(AU); +} + +// \brief If @MI is a DBG_VALUE with debug value described by a defined +// register, returns the number of this register. In the other case, returns 0. +static unsigned isDescribedByReg(const MachineInstr &MI) { + assert(MI.isDebugValue()); + assert(MI.getNumOperands() == 4); + // If location of variable is described using a register (directly or + // indirecltly), this register is always a first operand. + return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0; +} + +// \brief This function takes two DBG_VALUE instructions and returns true +// if their offsets are equal; otherwise returns false. +static bool areOffsetsEqual(const MachineInstr &MI1, const MachineInstr &MI2) { + assert(MI1.isDebugValue()); + assert(MI1.getNumOperands() == 4); + + assert(MI2.isDebugValue()); + assert(MI2.getNumOperands() == 4); + + if (!MI1.isIndirectDebugValue() && !MI2.isIndirectDebugValue()) + return true; + + // Check if both MIs are indirect and they are equal. + if (MI1.isIndirectDebugValue() && MI2.isIndirectDebugValue()) + return MI1.getOperand(1).getImm() == MI2.getOperand(1).getImm(); + + return false; +} + +//===----------------------------------------------------------------------===// +// Debug Range Extension Implementation +//===----------------------------------------------------------------------===// + +void LiveDebugValues::printVarLocInMBB(const VarLocInMBB &V, const char *msg, + raw_ostream &Out) const { + Out << "Printing " << msg << ":\n"; + for (const auto &L : V) { + Out << "MBB: " << L.first->getName() << ":\n"; + for (const auto &VLL : L.second) { + Out << " Var: " << VLL.Var.Var->getName(); + Out << " MI: "; + (*VLL.MI).dump(); + Out << "\n"; + } + } + Out << "\n"; +} + +bool LiveDebugValues::VarLoc::operator==(const VarLoc &V) const { + return (Var == V.Var) && (isDescribedByReg(*MI) == isDescribedByReg(*V.MI)) && + (areOffsetsEqual(*MI, *V.MI)); +} + +/// End all previous ranges related to @MI and start a new range from @MI +/// if it is a DBG_VALUE instr. +void LiveDebugValues::transferDebugValue(MachineInstr &MI, + VarLocList &OpenRanges) { + if (!MI.isDebugValue()) + return; + const DILocalVariable *RawVar = MI.getDebugVariable(); + assert(RawVar->isValidLocationForIntrinsic(MI.getDebugLoc()) && + "Expected inlined-at fields to agree"); + DebugVariable Var(RawVar, MI.getDebugLoc()->getInlinedAt()); + + // End all previous ranges of Var. + OpenRanges.erase( + std::remove_if(OpenRanges.begin(), OpenRanges.end(), + [&](const VarLoc &V) { return (Var == V.Var); }), + OpenRanges.end()); + + // Add Var to OpenRanges from this DBG_VALUE. + // TODO: Currently handles DBG_VALUE which has only reg as location. + if (isDescribedByReg(MI)) { + VarLoc V(Var, &MI); + OpenRanges.push_back(std::move(V)); + } +} + +/// A definition of a register may mark the end of a range. +void LiveDebugValues::transferRegisterDef(MachineInstr &MI, + VarLocList &OpenRanges) { + for (const MachineOperand &MO : MI.operands()) { + if (!(MO.isReg() && MO.isDef() && MO.getReg() && + TRI->isPhysicalRegister(MO.getReg()))) + continue; + // Remove ranges of all aliased registers. + for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI) + OpenRanges.erase(std::remove_if(OpenRanges.begin(), OpenRanges.end(), + [&](const VarLoc &V) { + return (*RAI == + isDescribedByReg(*V.MI)); + }), + OpenRanges.end()); + } +} + +/// Terminate all open ranges at the end of the current basic block. +void LiveDebugValues::transferTerminatorInst(MachineInstr &MI, + VarLocList &OpenRanges, + VarLocInMBB &OutLocs) { + const MachineBasicBlock *CurMBB = MI.getParent(); + if (!(MI.isTerminator() || (&MI == &CurMBB->instr_back()))) + return; + + if (OpenRanges.empty()) + return; + + if (OutLocs.find(CurMBB) == OutLocs.end()) { + // Create space for new Outgoing locs entries. + VarLocList VLL; + OutLocs.insert(std::make_pair(CurMBB, std::move(VLL))); + } + auto OL = OutLocs.find(CurMBB); + assert(OL != OutLocs.end()); + VarLocList &VLL = OL->second; + + for (auto OR : OpenRanges) { + // Copy OpenRanges to OutLocs, if not already present. + assert(OR.MI->isDebugValue()); + DEBUG(dbgs() << "Add to OutLocs: "; OR.MI->dump();); + if (std::find_if(VLL.begin(), VLL.end(), + [&](const VarLoc &V) { return (OR == V); }) == VLL.end()) { + VLL.push_back(std::move(OR)); + OLChanged = true; + } + } + OpenRanges.clear(); +} + +/// This routine creates OpenRanges and OutLocs. +void LiveDebugValues::transfer(MachineInstr &MI, VarLocList &OpenRanges, + VarLocInMBB &OutLocs) { + transferDebugValue(MI, OpenRanges); + transferRegisterDef(MI, OpenRanges); + transferTerminatorInst(MI, OpenRanges, OutLocs); +} + +/// This routine joins the analysis results of all incoming edges in @MBB by +/// inserting a new DBG_VALUE instruction at the start of the @MBB - if the same +/// source variable in all the predecessors of @MBB reside in the same location. +void LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, + VarLocInMBB &InLocs) { + DEBUG(dbgs() << "join MBB: " << MBB.getName() << "\n"); + + MBBJoined = false; + + VarLocList InLocsT; // Temporary incoming locations. + + // For all predecessors of this MBB, find the set of VarLocs that can be + // joined. + for (auto p : MBB.predecessors()) { + auto OL = OutLocs.find(p); + // Join is null in case of empty OutLocs from any of the pred. + if (OL == OutLocs.end()) + return; + + // Just copy over the Out locs to incoming locs for the first predecessor. + if (p == *MBB.pred_begin()) { + InLocsT = OL->second; + continue; + } + + // Join with this predecessor. + VarLocList &VLL = OL->second; + InLocsT.erase(std::remove_if(InLocsT.begin(), InLocsT.end(), + [&](VarLoc &ILT) { + return (std::find_if(VLL.begin(), VLL.end(), + [&](const VarLoc &V) { + return (ILT == V); + }) == VLL.end()); + }), + InLocsT.end()); + } + + if (InLocsT.empty()) + return; + + if (InLocs.find(&MBB) == InLocs.end()) { + // Create space for new Incoming locs entries. + VarLocList VLL; + InLocs.insert(std::make_pair(&MBB, std::move(VLL))); + } + auto IL = InLocs.find(&MBB); + assert(IL != InLocs.end()); + VarLocList &ILL = IL->second; + + // Insert DBG_VALUE instructions, if not already inserted. + for (auto ILT : InLocsT) { + if (std::find_if(ILL.begin(), ILL.end(), [&](const VarLoc &I) { + return (ILT == I); + }) == ILL.end()) { + // This VarLoc is not found in InLocs i.e. it is not yet inserted. So, a + // new range is started for the var from the mbb's beginning by inserting + // a new DBG_VALUE. transfer() will end this range however appropriate. + const MachineInstr *DMI = ILT.MI; + MachineInstr *MI = + BuildMI(MBB, MBB.instr_begin(), DMI->getDebugLoc(), DMI->getDesc(), + DMI->isIndirectDebugValue(), DMI->getOperand(0).getReg(), 0, + DMI->getDebugVariable(), DMI->getDebugExpression()); + if (DMI->isIndirectDebugValue()) + MI->getOperand(1).setImm(DMI->getOperand(1).getImm()); + DEBUG(dbgs() << "Inserted: "; MI->dump();); + ++NumInserted; + MBBJoined = true; // rerun transfer(). + + VarLoc V(ILT.Var, MI); + ILL.push_back(std::move(V)); + } + } +} + +/// Calculate the liveness information for the given machine function and +/// extend ranges across basic blocks. +bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { + + DEBUG(dbgs() << "\nDebug Range Extension\n"); + + bool Changed = false; + OLChanged = MBBJoined = false; + + VarLocList OpenRanges; // Ranges that are open until end of bb. + VarLocInMBB OutLocs; // Ranges that exist beyond bb. + VarLocInMBB InLocs; // Ranges that are incoming after joining. + + std::deque<MachineBasicBlock *> BBWorklist; + + // Initialize every mbb with OutLocs. + for (auto &MBB : MF) + for (auto &MI : MBB) + transfer(MI, OpenRanges, OutLocs); + DEBUG(printVarLocInMBB(OutLocs, "OutLocs after initialization", dbgs())); + + // Construct a worklist of MBBs. + for (auto &MBB : MF) + BBWorklist.push_back(&MBB); + + // Perform join() and transfer() using the worklist until the ranges converge + // Ranges have converged when the worklist is empty. + while (!BBWorklist.empty()) { + MachineBasicBlock *MBB = BBWorklist.front(); + BBWorklist.pop_front(); + + join(*MBB, OutLocs, InLocs); + + if (MBBJoined) { + Changed = true; + for (auto &MI : *MBB) + transfer(MI, OpenRanges, OutLocs); + DEBUG(printVarLocInMBB(OutLocs, "OutLocs after propagating", dbgs())); + DEBUG(printVarLocInMBB(InLocs, "InLocs after propagating", dbgs())); + + if (OLChanged) { + OLChanged = false; + for (auto s : MBB->successors()) + if (std::find(BBWorklist.begin(), BBWorklist.end(), s) == + BBWorklist.end()) // add if not already present. + BBWorklist.push_back(s); + } + } + } + DEBUG(printVarLocInMBB(OutLocs, "Final OutLocs", dbgs())); + DEBUG(printVarLocInMBB(InLocs, "Final InLocs", dbgs())); + return Changed; +} + +bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) { + TRI = MF.getSubtarget().getRegisterInfo(); + TII = MF.getSubtarget().getInstrInfo(); + + bool Changed = false; + + Changed |= ExtendRanges(MF); + + return Changed; +} diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp index 15715513452d..6dac7dbd15bf 100644 --- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -91,9 +91,7 @@ public: bool dominates(MachineBasicBlock *MBB) { if (LBlocks.empty()) LS.getMachineBasicBlocks(DL, LBlocks); - if (LBlocks.count(MBB) != 0 || LS.dominates(DL, MBB)) - return true; - return false; + return LBlocks.count(MBB) != 0 || LS.dominates(DL, MBB); } }; } // end anonymous namespace @@ -512,7 +510,7 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) { bool Changed = false; for (MachineFunction::iterator MFI = mf.begin(), MFE = mf.end(); MFI != MFE; ++MFI) { - MachineBasicBlock *MBB = MFI; + MachineBasicBlock *MBB = &*MFI; for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end(); MBBI != MBBE;) { if (!MBBI->isDebugValue()) { @@ -536,65 +534,49 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) { return Changed; } -void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, - LiveRange *LR, const VNInfo *VNI, - SmallVectorImpl<SlotIndex> *Kills, +/// We only propagate DBG_VALUES locally here. LiveDebugValues performs a +/// data-flow analysis to propagate them beyond basic block boundaries. +void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, LiveRange *LR, + const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills, LiveIntervals &LIS, MachineDominatorTree &MDT, UserValueScopes &UVS) { - SmallVector<SlotIndex, 16> Todo; - Todo.push_back(Idx); - do { - SlotIndex Start = Todo.pop_back_val(); - MachineBasicBlock *MBB = LIS.getMBBFromIndex(Start); - SlotIndex Stop = LIS.getMBBEndIdx(MBB); - LocMap::iterator I = locInts.find(Start); - - // Limit to VNI's live range. - bool ToEnd = true; - if (LR && VNI) { - LiveInterval::Segment *Segment = LR->getSegmentContaining(Start); - if (!Segment || Segment->valno != VNI) { - if (Kills) - Kills->push_back(Start); - continue; - } - if (Segment->end < Stop) - Stop = Segment->end, ToEnd = false; - } - - // There could already be a short def at Start. - if (I.valid() && I.start() <= Start) { - // Stop when meeting a different location or an already extended interval. - Start = Start.getNextSlot(); - if (I.value() != LocNo || I.stop() != Start) - continue; - // This is a one-slot placeholder. Just skip it. - ++I; + SlotIndex Start = Idx; + MachineBasicBlock *MBB = LIS.getMBBFromIndex(Start); + SlotIndex Stop = LIS.getMBBEndIdx(MBB); + LocMap::iterator I = locInts.find(Start); + + // Limit to VNI's live range. + bool ToEnd = true; + if (LR && VNI) { + LiveInterval::Segment *Segment = LR->getSegmentContaining(Start); + if (!Segment || Segment->valno != VNI) { + if (Kills) + Kills->push_back(Start); + return; } + if (Segment->end < Stop) + Stop = Segment->end, ToEnd = false; + } - // Limited by the next def. - if (I.valid() && I.start() < Stop) - Stop = I.start(), ToEnd = false; - // Limited by VNI's live range. - else if (!ToEnd && Kills) - Kills->push_back(Stop); + // There could already be a short def at Start. + if (I.valid() && I.start() <= Start) { + // Stop when meeting a different location or an already extended interval. + Start = Start.getNextSlot(); + if (I.value() != LocNo || I.stop() != Start) + return; + // This is a one-slot placeholder. Just skip it. + ++I; + } - if (Start >= Stop) - continue; + // Limited by the next def. + if (I.valid() && I.start() < Stop) + Stop = I.start(), ToEnd = false; + // Limited by VNI's live range. + else if (!ToEnd && Kills) + Kills->push_back(Stop); + if (Start < Stop) I.insert(Start, Stop, LocNo); - - // If we extended to the MBB end, propagate down the dominator tree. - if (!ToEnd) - continue; - const std::vector<MachineDomTreeNode*> &Children = - MDT.getNode(MBB)->getChildren(); - for (unsigned i = 0, e = Children.size(); i != e; ++i) { - MachineBasicBlock *MBB = Children[i]->getBlock(); - if (UVS.dominates(MBB)) - Todo.push_back(LIS.getMBBStartIdx(MBB)); - } - } while (!Todo.empty()); } void @@ -763,7 +745,7 @@ static void removeDebugValues(MachineFunction &mf) { bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) { if (!EnableLDV) return false; - if (!FunctionDIs.count(mf.getFunction())) { + if (!mf.getFunction()->getSubprogram()) { removeDebugValues(mf); return false; } @@ -1004,11 +986,11 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, SlotIndex Stop = I.stop(); unsigned LocNo = I.value(); DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << LocNo); - MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start); - SlotIndex MBBEnd = LIS.getMBBEndIdx(MBB); + MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start)->getIterator(); + SlotIndex MBBEnd = LIS.getMBBEndIdx(&*MBB); DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd); - insertDebugValue(MBB, Start, LocNo, LIS, TII); + insertDebugValue(&*MBB, Start, LocNo, LIS, TII); // This interval may span multiple basic blocks. // Insert a DBG_VALUE into each one. while(Stop > MBBEnd) { @@ -1016,9 +998,9 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, Start = MBBEnd; if (++MBB == MFEnd) break; - MBBEnd = LIS.getMBBEndIdx(MBB); + MBBEnd = LIS.getMBBEndIdx(&*MBB); DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd); - insertDebugValue(MBB, Start, LocNo, LIS, TII); + insertDebugValue(&*MBB, Start, LocNo, LIS, TII); } DEBUG(dbgs() << '\n'); if (MBB == MFEnd) @@ -1047,7 +1029,6 @@ void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) { } bool LiveDebugVariables::doInitialization(Module &M) { - FunctionDIs = makeSubprogramMap(M); return Pass::doInitialization(M); } diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.h b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h index 694aa1770c9c..3d36f4d2494a 100644 --- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.h +++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h @@ -33,7 +33,6 @@ class VirtRegMap; class LLVM_LIBRARY_VISIBILITY LiveDebugVariables : public MachineFunctionPass { void *pImpl; - DenseMap<const Function *, DISubprogram *> FunctionDIs; public: static char ID; // Pass identification, replacement for typeid diff --git a/contrib/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm/lib/CodeGen/LiveInterval.cpp index d75e4417cb03..efad36ffa3f1 100644 --- a/contrib/llvm/lib/CodeGen/LiveInterval.cpp +++ b/contrib/llvm/lib/CodeGen/LiveInterval.cpp @@ -26,7 +26,6 @@ #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" #include <algorithm> @@ -865,7 +864,7 @@ void LiveInterval::constructMainRangeFromSubranges( // - If any of the subranges is live at a point the main liverange has to be // live too, conversily if no subrange is live the main range mustn't be // live either. - // We do this by scannig through all the subranges simultaneously creating new + // We do this by scanning through all the subranges simultaneously creating new // segments in the main range as segments start/ends come up in the subranges. assert(hasSubRanges() && "expected subranges to be present"); assert(segments.empty() && valnos.empty() && "expected empty main range"); @@ -889,7 +888,7 @@ void LiveInterval::constructMainRangeFromSubranges( Segment CurrentSegment; bool ConstructingSegment = false; bool NeedVNIFixup = false; - unsigned ActiveMask = 0; + LaneBitmask ActiveMask = 0; SlotIndex Pos = First; while (true) { SlotIndex NextPos = Last; @@ -899,7 +898,7 @@ void LiveInterval::constructMainRangeFromSubranges( END_SEGMENT, } Event = NOTHING; // Which subregister lanes are affected by the current event. - unsigned EventMask = 0; + LaneBitmask EventMask = 0; // Whether a BEGIN_SEGMENT is also a valno definition point. bool IsDef = false; // Find the next begin or end of a subrange segment. Combine masks if we @@ -1066,7 +1065,7 @@ void LiveInterval::print(raw_ostream &OS) const { super::print(OS); // Print subranges for (const SubRange &SR : subranges()) { - OS << format(" L%04X ", SR.LaneMask) << SR; + OS << " L" << PrintLaneMask(SR.LaneMask) << ' ' << SR; } } @@ -1101,8 +1100,8 @@ void LiveInterval::verify(const MachineRegisterInfo *MRI) const { super::verify(); // Make sure SubRanges are fine and LaneMasks are disjunct. - unsigned Mask = 0; - unsigned MaxMask = MRI != nullptr ? MRI->getMaxLaneMaskForVReg(reg) : ~0u; + LaneBitmask Mask = 0; + LaneBitmask MaxMask = MRI != nullptr ? MRI->getMaxLaneMaskForVReg(reg) : ~0u; for (const SubRange &SR : subranges()) { // Subrange lanemask should be disjunct to any previous subrange masks. assert((Mask & SR.LaneMask) == 0); @@ -1110,6 +1109,8 @@ void LiveInterval::verify(const MachineRegisterInfo *MRI) const { // subrange mask should not contained in maximum lane mask for the vreg. assert((Mask & ~MaxMask) == 0); + // empty subranges must be removed. + assert(!SR.empty()); SR.verify(); // Main liverange should cover subrange. @@ -1370,11 +1371,42 @@ unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) { return EqClass.getNumClasses(); } -void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[], - MachineRegisterInfo &MRI) { - assert(LIV[0] && "LIV[0] must be set"); - LiveInterval &LI = *LIV[0]; +template<typename LiveRangeT, typename EqClassesT> +static void DistributeRange(LiveRangeT &LR, LiveRangeT *SplitLRs[], + EqClassesT VNIClasses) { + // Move segments to new intervals. + LiveRange::iterator J = LR.begin(), E = LR.end(); + while (J != E && VNIClasses[J->valno->id] == 0) + ++J; + for (LiveRange::iterator I = J; I != E; ++I) { + if (unsigned eq = VNIClasses[I->valno->id]) { + assert((SplitLRs[eq-1]->empty() || SplitLRs[eq-1]->expiredAt(I->start)) && + "New intervals should be empty"); + SplitLRs[eq-1]->segments.push_back(*I); + } else + *J++ = *I; + } + LR.segments.erase(J, E); + + // Transfer VNInfos to their new owners and renumber them. + unsigned j = 0, e = LR.getNumValNums(); + while (j != e && VNIClasses[j] == 0) + ++j; + for (unsigned i = j; i != e; ++i) { + VNInfo *VNI = LR.getValNumInfo(i); + if (unsigned eq = VNIClasses[i]) { + VNI->id = SplitLRs[eq-1]->getNumValNums(); + SplitLRs[eq-1]->valnos.push_back(VNI); + } else { + VNI->id = j; + LR.valnos[j++] = VNI; + } + } + LR.valnos.resize(j); +} +void ConnectedVNInfoEqClasses::Distribute(LiveInterval &LI, LiveInterval *LIV[], + MachineRegisterInfo &MRI) { // Rewrite instructions. for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LI.reg), RE = MRI.reg_end(); RI != RE;) { @@ -1396,38 +1428,41 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[], // NULL. If the use is tied to a def, VNI will be the defined value. if (!VNI) continue; - MO.setReg(LIV[getEqClass(VNI)]->reg); - } - - // Move runs to new intervals. - LiveInterval::iterator J = LI.begin(), E = LI.end(); - while (J != E && EqClass[J->valno->id] == 0) - ++J; - for (LiveInterval::iterator I = J; I != E; ++I) { - if (unsigned eq = EqClass[I->valno->id]) { - assert((LIV[eq]->empty() || LIV[eq]->expiredAt(I->start)) && - "New intervals should be empty"); - LIV[eq]->segments.push_back(*I); - } else - *J++ = *I; + if (unsigned EqClass = getEqClass(VNI)) + MO.setReg(LIV[EqClass-1]->reg); } - // TODO: do not cheat anymore by simply cleaning all subranges - LI.clearSubRanges(); - LI.segments.erase(J, E); - // Transfer VNInfos to their new owners and renumber them. - unsigned j = 0, e = LI.getNumValNums(); - while (j != e && EqClass[j] == 0) - ++j; - for (unsigned i = j; i != e; ++i) { - VNInfo *VNI = LI.getValNumInfo(i); - if (unsigned eq = EqClass[i]) { - VNI->id = LIV[eq]->getNumValNums(); - LIV[eq]->valnos.push_back(VNI); - } else { - VNI->id = j; - LI.valnos[j++] = VNI; + // Distribute subregister liveranges. + if (LI.hasSubRanges()) { + unsigned NumComponents = EqClass.getNumClasses(); + SmallVector<unsigned, 8> VNIMapping; + SmallVector<LiveInterval::SubRange*, 8> SubRanges; + BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator(); + for (LiveInterval::SubRange &SR : LI.subranges()) { + // Create new subranges in the split intervals and construct a mapping + // for the VNInfos in the subrange. + unsigned NumValNos = SR.valnos.size(); + VNIMapping.clear(); + VNIMapping.reserve(NumValNos); + SubRanges.clear(); + SubRanges.resize(NumComponents-1, nullptr); + for (unsigned I = 0; I < NumValNos; ++I) { + const VNInfo &VNI = *SR.valnos[I]; + const VNInfo *MainRangeVNI = LI.getVNInfoAt(VNI.def); + assert(MainRangeVNI != nullptr + && "SubRange def must have corresponding main range def"); + unsigned ComponentNum = getEqClass(MainRangeVNI); + VNIMapping.push_back(ComponentNum); + if (ComponentNum > 0 && SubRanges[ComponentNum-1] == nullptr) { + SubRanges[ComponentNum-1] + = LIV[ComponentNum-1]->createSubRange(Allocator, SR.LaneMask); + } + } + DistributeRange(SR, SubRanges.data(), VNIMapping); } + LI.removeEmptySubRanges(); } - LI.valnos.resize(j); + + // Distribute main liverange. + DistributeRange(LI, LIV, EqClass); } diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp index c00b010e763b..9451d92bd7ae 100644 --- a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -32,7 +32,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -48,7 +47,7 @@ char LiveIntervals::ID = 0; char &llvm::LiveIntervalsID = LiveIntervals::ID; INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals", "Live Interval Analysis", false, false) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(LiveVariables) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) @@ -76,8 +75,8 @@ cl::opt<bool> UseSegmentSetForPhysRegs( void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); - AU.addRequired<AliasAnalysis>(); - AU.addPreserved<AliasAnalysis>(); + AU.addRequired<AAResultsWrapperPass>(); + AU.addPreserved<AAResultsWrapperPass>(); // LiveVariables isn't really required by this analysis, it is only required // here to make sure it is live during TwoAddressInstructionPass and // PHIElimination. This is temporary. @@ -124,7 +123,7 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { MRI = &MF->getRegInfo(); TRI = MF->getSubtarget().getRegisterInfo(); TII = MF->getSubtarget().getInstrInfo(); - AA = &getAnalysis<AliasAnalysis>(); + AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); Indexes = &getAnalysis<SlotIndexes>(); DomTree = &getAnalysis<MachineDominatorTree>(); @@ -198,9 +197,16 @@ LiveInterval* LiveIntervals::createInterval(unsigned reg) { void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) { assert(LRCalc && "LRCalc not initialized."); assert(LI.empty() && "Should only compute empty intervals."); + bool ShouldTrackSubRegLiveness = MRI->shouldTrackSubRegLiveness(LI.reg); LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); - LRCalc->calculate(LI, MRI->shouldTrackSubRegLiveness(LI.reg)); - computeDeadValues(LI, nullptr); + LRCalc->calculate(LI, ShouldTrackSubRegLiveness); + bool SeparatedComponents = computeDeadValues(LI, nullptr); + if (SeparatedComponents) { + assert(ShouldTrackSubRegLiveness + && "Separated components should only occur for unused subreg defs"); + SmallVector<LiveInterval*, 8> SplitLIs; + splitSeparateComponents(LI, SplitLIs); + } } void LiveIntervals::computeVirtRegs() { @@ -216,19 +222,31 @@ void LiveIntervals::computeRegMasks() { RegMaskBlocks.resize(MF->getNumBlockIDs()); // Find all instructions with regmask operands. - for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); - MBBI != E; ++MBBI) { - MachineBasicBlock *MBB = MBBI; - std::pair<unsigned, unsigned> &RMB = RegMaskBlocks[MBB->getNumber()]; + for (MachineBasicBlock &MBB : *MF) { + std::pair<unsigned, unsigned> &RMB = RegMaskBlocks[MBB.getNumber()]; RMB.first = RegMaskSlots.size(); - for (MachineBasicBlock::iterator MI = MBB->begin(), ME = MBB->end(); - MI != ME; ++MI) - for (const MachineOperand &MO : MI->operands()) { + + // Some block starts, such as EH funclets, create masks. + if (const uint32_t *Mask = MBB.getBeginClobberMask(TRI)) { + RegMaskSlots.push_back(Indexes->getMBBStartIdx(&MBB)); + RegMaskBits.push_back(Mask); + } + + for (MachineInstr &MI : MBB) { + for (const MachineOperand &MO : MI.operands()) { if (!MO.isRegMask()) continue; - RegMaskSlots.push_back(Indexes->getInstructionIndex(MI).getRegSlot()); - RegMaskBits.push_back(MO.getRegMask()); + RegMaskSlots.push_back(Indexes->getInstructionIndex(&MI).getRegSlot()); + RegMaskBits.push_back(MO.getRegMask()); } + } + + // Some block ends, such as funclet returns, create masks. + if (const uint32_t *Mask = MBB.getEndClobberMask(TRI)) { + RegMaskSlots.push_back(Indexes->getMBBEndIdx(&MBB)); + RegMaskBits.push_back(Mask); + } + // Compute the number of register mask instructions in this block. RMB.second = RegMaskSlots.size() - RMB.first; } @@ -296,18 +314,17 @@ void LiveIntervals::computeLiveInRegUnits() { // Check all basic blocks for live-ins. for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); MFI != MFE; ++MFI) { - const MachineBasicBlock *MBB = MFI; + const MachineBasicBlock *MBB = &*MFI; // We only care about ABI blocks: Entry + landing pads. - if ((MFI != MF->begin() && !MBB->isLandingPad()) || MBB->livein_empty()) + if ((MFI != MF->begin() && !MBB->isEHPad()) || MBB->livein_empty()) continue; // Create phi-defs at Begin for all live-in registers. SlotIndex Begin = Indexes->getMBBStartIdx(MBB); DEBUG(dbgs() << Begin << "\tBB#" << MBB->getNumber()); - for (MachineBasicBlock::livein_iterator LII = MBB->livein_begin(), - LIE = MBB->livein_end(); LII != LIE; ++LII) { - for (MCRegUnitIterator Units(*LII, TRI); Units.isValid(); ++Units) { + for (const auto &LI : MBB->liveins()) { + for (MCRegUnitIterator Units(LI.PhysReg, TRI); Units.isValid(); ++Units) { unsigned Unit = *Units; LiveRange *LR = RegUnitRanges[Unit]; if (!LR) { @@ -396,9 +413,6 @@ static void extendSegmentsToUses(LiveRange &LR, const SlotIndexes &Indexes, } } -/// shrinkToUses - After removing some uses of a register, shrink its live -/// range to just the remaining uses. This method does not compute reaching -/// defs for new uses, and it doesn't remove dead defs. bool LiveIntervals::shrinkToUses(LiveInterval *li, SmallVectorImpl<MachineInstr*> *dead) { DEBUG(dbgs() << "Shrink: " << *li << '\n'); @@ -406,9 +420,14 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, && "Can only shrink virtual registers"); // Shrink subregister live ranges. + bool NeedsCleanup = false; for (LiveInterval::SubRange &S : li->subranges()) { shrinkToUses(S, li->reg); + if (S.empty()) + NeedsCleanup = true; } + if (NeedsCleanup) + li->removeEmptySubRanges(); // Find all the values used, including PHI kills. ShrinkToUsesWorkList WorkList; @@ -456,7 +475,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, bool LiveIntervals::computeDeadValues(LiveInterval &LI, SmallVectorImpl<MachineInstr*> *dead) { - bool PHIRemoved = false; + bool MayHaveSplitComponents = false; for (auto VNI : LI.valnos) { if (VNI->isUnused()) continue; @@ -466,10 +485,13 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI, // Is the register live before? Otherwise we may have to add a read-undef // flag for subregister defs. - if (MRI->shouldTrackSubRegLiveness(LI.reg)) { + bool DeadBeforeDef = false; + unsigned VReg = LI.reg; + if (MRI->shouldTrackSubRegLiveness(VReg)) { if ((I == LI.begin() || std::prev(I)->end < Def) && !VNI->isPHIDef()) { MachineInstr *MI = getInstructionFromIndex(Def); - MI->addRegisterDefReadUndef(LI.reg); + MI->setRegisterDefReadUndef(VReg); + DeadBeforeDef = true; } } @@ -480,19 +502,27 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI, VNI->markUnused(); LI.removeSegment(I); DEBUG(dbgs() << "Dead PHI at " << Def << " may separate interval\n"); - PHIRemoved = true; + MayHaveSplitComponents = true; } else { // This is a dead def. Make sure the instruction knows. MachineInstr *MI = getInstructionFromIndex(Def); assert(MI && "No instruction defining live value"); - MI->addRegisterDead(LI.reg, TRI); + MI->addRegisterDead(VReg, TRI); + + // If we have a dead def that is completely separate from the rest of + // the liverange then we rewrite it to use a different VReg to not violate + // the rule that the liveness of a virtual register forms a connected + // component. This should only happen if subregister liveness is tracked. + if (DeadBeforeDef) + MayHaveSplitComponents = true; + if (dead && MI->allDefsAreDead()) { DEBUG(dbgs() << "All defs dead: " << Def << '\t' << *MI); dead->push_back(MI); } } } - return PHIRemoved; + return MayHaveSplitComponents; } void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg) @@ -512,8 +542,8 @@ void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg) // Maybe the operand is for a subregister we don't care about. unsigned SubReg = MO.getSubReg(); if (SubReg != 0) { - unsigned SubRegMask = TRI->getSubRegIndexLaneMask(SubReg); - if ((SubRegMask & SR.LaneMask) == 0) + LaneBitmask LaneMask = TRI->getSubRegIndexLaneMask(SubReg); + if ((LaneMask & SR.LaneMask) == 0) continue; } // We only need to visit each instruction once. @@ -712,7 +742,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { // assign R0L to %vreg1, and R0 to %vreg2 because the low 32bits of R0 // are actually never written by %vreg2. After assignment the <kill> // flag at the read instruction is invalid. - unsigned DefinedLanesMask; + LaneBitmask DefinedLanesMask; if (!SRs.empty()) { // Compute a mask of lanes that are defined. DefinedLanesMask = 0; @@ -736,7 +766,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { continue; if (MO.isUse()) { // Reading any undefined lanes? - unsigned UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg()); + LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg()); if ((UseMask & ~DefinedLanesMask) != 0) goto CancelKill; } else if (MO.getSubReg() == 0) { @@ -944,7 +974,7 @@ public: LiveInterval &LI = LIS.getInterval(Reg); if (LI.hasSubRanges()) { unsigned SubReg = MO.getSubReg(); - unsigned LaneMask = TRI.getSubRegIndexLaneMask(SubReg); + LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubReg); for (LiveInterval::SubRange &S : LI.subranges()) { if ((S.LaneMask & LaneMask) == 0) continue; @@ -968,7 +998,7 @@ public: private: /// Update a single live range, assuming an instruction has been moved from /// OldIdx to NewIdx. - void updateRange(LiveRange &LR, unsigned Reg, unsigned LaneMask) { + void updateRange(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask) { if (!Updated.insert(&LR).second) return; DEBUG({ @@ -976,7 +1006,7 @@ private: if (TargetRegisterInfo::isVirtualRegister(Reg)) { dbgs() << PrintReg(Reg); if (LaneMask != 0) - dbgs() << format(" L%04X", LaneMask); + dbgs() << " L" << PrintLaneMask(LaneMask); } else { dbgs() << PrintRegUnit(Reg, &TRI); } @@ -1098,7 +1128,7 @@ private: /// Hoist kill to NewIdx, then scan for last kill between NewIdx and /// OldIdx. /// - void handleMoveUp(LiveRange &LR, unsigned Reg, unsigned LaneMask) { + void handleMoveUp(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask) { // First look for a kill at OldIdx. LiveRange::iterator I = LR.find(OldIdx.getBaseIndex()); LiveRange::iterator E = LR.end(); @@ -1175,7 +1205,7 @@ private: } // Return the last use of reg between NewIdx and OldIdx. - SlotIndex findLastUseBefore(unsigned Reg, unsigned LaneMask) { + SlotIndex findLastUseBefore(unsigned Reg, LaneBitmask LaneMask) { if (TargetRegisterInfo::isVirtualRegister(Reg)) { SlotIndex LastUse = NewIdx; @@ -1255,7 +1285,7 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin, const MachineBasicBlock::iterator End, const SlotIndex endIdx, LiveRange &LR, const unsigned Reg, - const unsigned LaneMask) { + LaneBitmask LaneMask) { LiveInterval::iterator LII = LR.find(endIdx); SlotIndex lastUseIdx; if (LII != LR.end() && LII->start < endIdx) @@ -1282,7 +1312,7 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin, continue; unsigned SubReg = MO.getSubReg(); - unsigned Mask = TRI->getSubRegIndexLaneMask(SubReg); + LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubReg); if ((Mask & LaneMask) == 0) continue; @@ -1412,3 +1442,20 @@ void LiveIntervals::removeVRegDefAt(LiveInterval &LI, SlotIndex Pos) { } LI.removeEmptySubRanges(); } + +void LiveIntervals::splitSeparateComponents(LiveInterval &LI, + SmallVectorImpl<LiveInterval*> &SplitLIs) { + ConnectedVNInfoEqClasses ConEQ(*this); + unsigned NumComp = ConEQ.Classify(&LI); + if (NumComp <= 1) + return; + DEBUG(dbgs() << " Split " << NumComp << " components: " << LI << '\n'); + unsigned Reg = LI.reg; + const TargetRegisterClass *RegClass = MRI->getRegClass(Reg); + for (unsigned I = 1; I < NumComp; ++I) { + unsigned NewVReg = MRI->createVirtualRegister(RegClass); + LiveInterval &NewLI = createEmptyInterval(NewVReg); + SplitLIs.push_back(&NewLI); + } + ConEQ.Distribute(LI, SplitLIs.data(), *MRI); +} diff --git a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp index cbd98e3f3450..efbbcbe23e15 100644 --- a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp +++ b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp @@ -68,7 +68,7 @@ void LivePhysRegs::stepBackward(const MachineInstr &MI) { /// Simulates liveness when stepping forward over an instruction(bundle): Remove /// killed-uses, add defs. This is the not recommended way, because it depends -/// on accurate kill flags. If possible use stepBackwards() instead of this +/// on accurate kill flags. If possible use stepBackward() instead of this /// function. void LivePhysRegs::stepForward(const MachineInstr &MI, SmallVectorImpl<std::pair<unsigned, const MachineOperand*>> &Clobbers) { @@ -128,8 +128,8 @@ void LivePhysRegs::dump() const { /// Add live-in registers of basic block \p MBB to \p LiveRegs. static void addLiveIns(LivePhysRegs &LiveRegs, const MachineBasicBlock &MBB) { - for (unsigned Reg : make_range(MBB.livein_begin(), MBB.livein_end())) - LiveRegs.addReg(Reg); + for (const auto &LI : MBB.liveins()) + LiveRegs.addReg(LI.PhysReg); } /// Add pristine registers to the given \p LiveRegs. This function removes @@ -147,11 +147,19 @@ static void addPristines(LivePhysRegs &LiveRegs, const MachineFunction &MF, } void LivePhysRegs::addLiveOuts(const MachineBasicBlock *MBB, - bool AddPristines) { - if (AddPristines) { + bool AddPristinesAndCSRs) { + if (AddPristinesAndCSRs) { const MachineFunction &MF = *MBB->getParent(); addPristines(*this, MF, *TRI); + if (!MBB->isReturnBlock()) { + // The return block has no successors whose live-ins we could merge + // below. So instead we add the callee saved registers manually. + for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) + addReg(*I); + } } + + // To get the live-outs we simply merge the live-ins of all successors. for (const MachineBasicBlock *Succ : MBB->successors()) ::addLiveIns(*this, *Succ); } diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp index bb2877ae31a8..c408615d42e2 100644 --- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp +++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp @@ -64,23 +64,23 @@ void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) { unsigned SubReg = MO.getSubReg(); if (LI.hasSubRanges() || (SubReg != 0 && TrackSubRegs)) { - unsigned Mask = SubReg != 0 ? TRI.getSubRegIndexLaneMask(SubReg) - : MRI->getMaxLaneMaskForVReg(Reg); + LaneBitmask Mask = SubReg != 0 ? TRI.getSubRegIndexLaneMask(SubReg) + : MRI->getMaxLaneMaskForVReg(Reg); // If this is the first time we see a subregister def, initialize // subranges by creating a copy of the main range. if (!LI.hasSubRanges() && !LI.empty()) { - unsigned ClassMask = MRI->getMaxLaneMaskForVReg(Reg); + LaneBitmask ClassMask = MRI->getMaxLaneMaskForVReg(Reg); LI.createSubRangeFrom(*Alloc, ClassMask, LI); } for (LiveInterval::SubRange &S : LI.subranges()) { // A Mask for subregs common to the existing subrange and current def. - unsigned Common = S.LaneMask & Mask; + LaneBitmask Common = S.LaneMask & Mask; if (Common == 0) continue; // A Mask for subregs covered by the subrange but not the current def. - unsigned LRest = S.LaneMask & ~Mask; + LaneBitmask LRest = S.LaneMask & ~Mask; LiveInterval::SubRange *CommonRange; if (LRest != 0) { // Split current subrange into Common and LRest ranges. @@ -138,7 +138,8 @@ void LiveRangeCalc::createDeadDefs(LiveRange &LR, unsigned Reg) { } -void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg, unsigned Mask) { +void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg, + LaneBitmask Mask) { // Visit all operands that read Reg. This may include partial defs. const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo(); for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) { @@ -157,7 +158,7 @@ void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg, unsigned Mask) { continue; unsigned SubReg = MO.getSubReg(); if (SubReg != 0) { - unsigned SubRegMask = TRI.getSubRegIndexLaneMask(SubReg); + LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(SubReg); // Ignore uses not covering the current subrange. if ((SubRegMask & Mask) == 0) continue; diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h index 34d99534834b..ff38c68820f1 100644 --- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h +++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h @@ -129,7 +129,7 @@ class LiveRangeCalc { /// /// All uses must be jointly dominated by existing liveness. PHI-defs are /// inserted as needed to preserve SSA form. - void extendToUses(LiveRange &LR, unsigned Reg, unsigned LaneMask); + void extendToUses(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask); /// Reset Map and Seen fields. void resetLiveOutMap(); diff --git a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp index 08bbe0c3f379..5ce364ae661e 100644 --- a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -226,7 +226,7 @@ bool LiveRangeEdit::useIsKill(const LiveInterval &LI, return true; const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); unsigned SubReg = MO.getSubReg(); - unsigned LaneMask = TRI.getSubRegIndexLaneMask(SubReg); + LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubReg); for (const LiveInterval::SubRange &S : LI.subranges()) { if ((S.LaneMask & LaneMask) != 0 && S.Query(Idx).isKill()) return true; @@ -349,8 +349,9 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, ToShrink.pop_back(); if (foldAsLoad(LI, Dead)) continue; + unsigned VReg = LI->reg; if (TheDelegate) - TheDelegate->LRE_WillShrinkVirtReg(LI->reg); + TheDelegate->LRE_WillShrinkVirtReg(VReg); if (!LIS.shrinkToUses(LI, &Dead)) continue; @@ -360,7 +361,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, // them results in incorrect code. bool BeingSpilled = false; for (unsigned i = 0, e = RegsBeingSpilled.size(); i != e; ++i) { - if (LI->reg == RegsBeingSpilled[i]) { + if (VReg == RegsBeingSpilled[i]) { BeingSpilled = true; break; } @@ -370,29 +371,21 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, // LI may have been separated, create new intervals. LI->RenumberValues(); - ConnectedVNInfoEqClasses ConEQ(LIS); - unsigned NumComp = ConEQ.Classify(LI); - if (NumComp <= 1) - continue; - ++NumFracRanges; - bool IsOriginal = VRM && VRM->getOriginal(LI->reg) == LI->reg; - DEBUG(dbgs() << NumComp << " components: " << *LI << '\n'); - SmallVector<LiveInterval*, 8> Dups(1, LI); - for (unsigned i = 1; i != NumComp; ++i) { - Dups.push_back(&createEmptyIntervalFrom(LI->reg)); + SmallVector<LiveInterval*, 8> SplitLIs; + LIS.splitSeparateComponents(*LI, SplitLIs); + if (!SplitLIs.empty()) + ++NumFracRanges; + + unsigned Original = VRM ? VRM->getOriginal(VReg) : 0; + for (const LiveInterval *SplitLI : SplitLIs) { // If LI is an original interval that hasn't been split yet, make the new // intervals their own originals instead of referring to LI. The original // interval must contain all the split products, and LI doesn't. - if (IsOriginal) - VRM->setIsSplitFromReg(Dups.back()->reg, 0); + if (Original != VReg && Original != 0) + VRM->setIsSplitFromReg(SplitLI->reg, Original); if (TheDelegate) - TheDelegate->LRE_DidCloneVirtReg(Dups.back()->reg, LI->reg); + TheDelegate->LRE_DidCloneVirtReg(SplitLI->reg, VReg); } - ConEQ.Distribute(&Dups[0], MRI); - DEBUG({ - for (unsigned i = 0; i != NumComp; ++i) - dbgs() << '\t' << *Dups[i] << '\n'; - }); } } @@ -411,7 +404,7 @@ void LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF, const MachineLoopInfo &Loops, const MachineBlockFrequencyInfo &MBFI) { - VirtRegAuxInfo VRAI(MF, LIS, Loops, MBFI); + VirtRegAuxInfo VRAI(MF, LIS, VRM, Loops, MBFI); for (unsigned I = 0, Size = size(); I < Size; ++I) { LiveInterval &LI = LIS.getInterval(get(I)); if (MRI.recomputeRegClass(LI.reg)) diff --git a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp index 9ea031d38d29..7ee87c1e650f 100644 --- a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp +++ b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp @@ -15,12 +15,11 @@ #include "RegisterCoalescer.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -49,7 +48,6 @@ void LiveRegMatrix::getAnalysisUsage(AnalysisUsage &AU) const { bool LiveRegMatrix::runOnMachineFunction(MachineFunction &MF) { TRI = MF.getSubtarget().getRegisterInfo(); - MRI = &MF.getRegInfo(); LIS = &getAnalysis<LiveIntervals>(); VRM = &getAnalysis<VirtRegMap>(); @@ -78,7 +76,7 @@ bool foreachUnit(const TargetRegisterInfo *TRI, LiveInterval &VRegInterval, if (VRegInterval.hasSubRanges()) { for (MCRegUnitMaskIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { unsigned Unit = (*Units).first; - unsigned Mask = (*Units).second; + LaneBitmask Mask = (*Units).second; for (LiveInterval::SubRange &S : VRegInterval.subranges()) { if (S.LaneMask & Mask) { if (Func(Unit, S)) @@ -101,7 +99,6 @@ void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) { << " to " << PrintReg(PhysReg, TRI) << ':'); assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment"); VRM->assignVirt2Phys(VirtReg.reg, PhysReg); - MRI->setPhysRegUsed(PhysReg); foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit, const LiveRange &Range) { diff --git a/contrib/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm/lib/CodeGen/LiveVariables.cpp index b355393e76f7..06b86d82daf1 100644 --- a/contrib/llvm/lib/CodeGen/LiveVariables.cpp +++ b/contrib/llvm/lib/CodeGen/LiveVariables.cpp @@ -522,11 +522,15 @@ void LiveVariables::runOnInstr(MachineInstr *MI, continue; unsigned MOReg = MO.getReg(); if (MO.isUse()) { - MO.setIsKill(false); + if (!(TargetRegisterInfo::isPhysicalRegister(MOReg) && + MRI->isReserved(MOReg))) + MO.setIsKill(false); if (MO.readsReg()) UseRegs.push_back(MOReg); } else /*MO.isDef()*/ { - MO.setIsDead(false); + if (!(TargetRegisterInfo::isPhysicalRegister(MOReg) && + MRI->isReserved(MOReg))) + MO.setIsDead(false); DefRegs.push_back(MOReg); } } @@ -559,11 +563,10 @@ void LiveVariables::runOnInstr(MachineInstr *MI, void LiveVariables::runOnBlock(MachineBasicBlock *MBB, const unsigned NumRegs) { // Mark live-in registers as live-in. SmallVector<unsigned, 4> Defs; - for (MachineBasicBlock::livein_iterator II = MBB->livein_begin(), - EE = MBB->livein_end(); II != EE; ++II) { - assert(TargetRegisterInfo::isPhysicalRegister(*II) && + for (const auto &LI : MBB->liveins()) { + assert(TargetRegisterInfo::isPhysicalRegister(LI.PhysReg) && "Cannot have a live-in virtual register!"); - HandlePhysRegDef(*II, nullptr, Defs); + HandlePhysRegDef(LI.PhysReg, nullptr, Defs); } // Loop over all of the instructions, processing them. @@ -599,14 +602,12 @@ void LiveVariables::runOnBlock(MachineBasicBlock *MBB, const unsigned NumRegs) { for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) { MachineBasicBlock *SuccMBB = *SI; - if (SuccMBB->isLandingPad()) + if (SuccMBB->isEHPad()) continue; - for (MachineBasicBlock::livein_iterator LI = SuccMBB->livein_begin(), - LE = SuccMBB->livein_end(); LI != LE; ++LI) { - unsigned LReg = *LI; - if (!TRI->isInAllocatableClass(LReg)) + for (const auto &LI : SuccMBB->liveins()) { + if (!TRI->isInAllocatableClass(LI.PhysReg)) // Ignore other live-ins, e.g. those that are live into landing pads. - LiveOuts.insert(LReg); + LiveOuts.insert(LI.PhysReg); } } @@ -640,7 +641,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { // function. This guarantees that we will see the definition of a virtual // register before its uses due to dominance properties of SSA (except for PHI // nodes, which are treated as a special case). - MachineBasicBlock *Entry = MF->begin(); + MachineBasicBlock *Entry = &MF->front(); SmallPtrSet<MachineBasicBlock*,16> Visited; for (MachineBasicBlock *MBB : depth_first_ext(Entry, Visited)) { diff --git a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp index 837842914b4c..eb60005764c0 100644 --- a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -325,7 +325,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { // Sort the frame references by local offset array_pod_sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end()); - MachineBasicBlock *Entry = Fn.begin(); + MachineBasicBlock *Entry = &Fn.front(); unsigned BaseReg = 0; int64_t BaseOffset = 0; diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp index 482c33ae2235..28f9d4e298f9 100644 --- a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp +++ b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "MILexer.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" #include <cctype> @@ -54,15 +55,132 @@ public: } // end anonymous namespace +MIToken &MIToken::reset(TokenKind Kind, StringRef Range) { + this->Kind = Kind; + this->Range = Range; + return *this; +} + +MIToken &MIToken::setStringValue(StringRef StrVal) { + StringValue = StrVal; + return *this; +} + +MIToken &MIToken::setOwnedStringValue(std::string StrVal) { + StringValueStorage = std::move(StrVal); + StringValue = StringValueStorage; + return *this; +} + +MIToken &MIToken::setIntegerValue(APSInt IntVal) { + this->IntVal = std::move(IntVal); + return *this; +} + /// Skip the leading whitespace characters and return the updated cursor. static Cursor skipWhitespace(Cursor C) { - while (isspace(C.peek())) + while (isblank(C.peek())) + C.advance(); + return C; +} + +static bool isNewlineChar(char C) { return C == '\n' || C == '\r'; } + +/// Skip a line comment and return the updated cursor. +static Cursor skipComment(Cursor C) { + if (C.peek() != ';') + return C; + while (!isNewlineChar(C.peek()) && !C.isEOF()) C.advance(); return C; } +/// Return true if the given character satisfies the following regular +/// expression: [-a-zA-Z$._0-9] static bool isIdentifierChar(char C) { - return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.'; + return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.' || + C == '$'; +} + +/// Unescapes the given string value. +/// +/// Expects the string value to be quoted. +static std::string unescapeQuotedString(StringRef Value) { + assert(Value.front() == '"' && Value.back() == '"'); + Cursor C = Cursor(Value.substr(1, Value.size() - 2)); + + std::string Str; + Str.reserve(C.remaining().size()); + while (!C.isEOF()) { + char Char = C.peek(); + if (Char == '\\') { + if (C.peek(1) == '\\') { + // Two '\' become one + Str += '\\'; + C.advance(2); + continue; + } + if (isxdigit(C.peek(1)) && isxdigit(C.peek(2))) { + Str += hexDigitValue(C.peek(1)) * 16 + hexDigitValue(C.peek(2)); + C.advance(3); + continue; + } + } + Str += Char; + C.advance(); + } + return Str; +} + +/// Lex a string constant using the following regular expression: \"[^\"]*\" +static Cursor lexStringConstant( + Cursor C, + function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { + assert(C.peek() == '"'); + for (C.advance(); C.peek() != '"'; C.advance()) { + if (C.isEOF() || isNewlineChar(C.peek())) { + ErrorCallback( + C.location(), + "end of machine instruction reached before the closing '\"'"); + return None; + } + } + C.advance(); + return C; +} + +static Cursor lexName( + Cursor C, MIToken &Token, MIToken::TokenKind Type, unsigned PrefixLength, + function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { + auto Range = C; + C.advance(PrefixLength); + if (C.peek() == '"') { + if (Cursor R = lexStringConstant(C, ErrorCallback)) { + StringRef String = Range.upto(R); + Token.reset(Type, String) + .setOwnedStringValue( + unescapeQuotedString(String.drop_front(PrefixLength))); + return R; + } + Token.reset(MIToken::Error, Range.remaining()); + return Range; + } + while (isIdentifierChar(C.peek())) + C.advance(); + Token.reset(Type, Range.upto(C)) + .setStringValue(Range.upto(C).drop_front(PrefixLength)); + return C; +} + +static Cursor maybeLexIntegerType(Cursor C, MIToken &Token) { + if (C.peek() != 'i' || !isdigit(C.peek(1))) + return None; + auto Range = C; + C.advance(); // Skip 'i' + while (isdigit(C.peek())) + C.advance(); + Token.reset(MIToken::IntegerType, Range.upto(C)); + return C; } static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { @@ -70,32 +188,70 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("_", MIToken::underscore) .Case("implicit", MIToken::kw_implicit) .Case("implicit-def", MIToken::kw_implicit_define) + .Case("def", MIToken::kw_def) .Case("dead", MIToken::kw_dead) .Case("killed", MIToken::kw_killed) .Case("undef", MIToken::kw_undef) + .Case("internal", MIToken::kw_internal) + .Case("early-clobber", MIToken::kw_early_clobber) + .Case("debug-use", MIToken::kw_debug_use) + .Case("tied-def", MIToken::kw_tied_def) + .Case("frame-setup", MIToken::kw_frame_setup) + .Case("debug-location", MIToken::kw_debug_location) + .Case(".cfi_same_value", MIToken::kw_cfi_same_value) + .Case(".cfi_offset", MIToken::kw_cfi_offset) + .Case(".cfi_def_cfa_register", MIToken::kw_cfi_def_cfa_register) + .Case(".cfi_def_cfa_offset", MIToken::kw_cfi_def_cfa_offset) + .Case(".cfi_def_cfa", MIToken::kw_cfi_def_cfa) + .Case("blockaddress", MIToken::kw_blockaddress) + .Case("target-index", MIToken::kw_target_index) + .Case("half", MIToken::kw_half) + .Case("float", MIToken::kw_float) + .Case("double", MIToken::kw_double) + .Case("x86_fp80", MIToken::kw_x86_fp80) + .Case("fp128", MIToken::kw_fp128) + .Case("ppc_fp128", MIToken::kw_ppc_fp128) + .Case("target-flags", MIToken::kw_target_flags) + .Case("volatile", MIToken::kw_volatile) + .Case("non-temporal", MIToken::kw_non_temporal) + .Case("invariant", MIToken::kw_invariant) + .Case("align", MIToken::kw_align) + .Case("stack", MIToken::kw_stack) + .Case("got", MIToken::kw_got) + .Case("jump-table", MIToken::kw_jump_table) + .Case("constant-pool", MIToken::kw_constant_pool) + .Case("call-entry", MIToken::kw_call_entry) + .Case("liveout", MIToken::kw_liveout) + .Case("address-taken", MIToken::kw_address_taken) + .Case("landing-pad", MIToken::kw_landing_pad) + .Case("liveins", MIToken::kw_liveins) + .Case("successors", MIToken::kw_successors) .Default(MIToken::Identifier); } static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) { - if (!isalpha(C.peek()) && C.peek() != '_') + if (!isalpha(C.peek()) && C.peek() != '_' && C.peek() != '.') return None; auto Range = C; while (isIdentifierChar(C.peek())) C.advance(); auto Identifier = Range.upto(C); - Token = MIToken(getIdentifierKind(Identifier), Identifier); + Token.reset(getIdentifierKind(Identifier), Identifier) + .setStringValue(Identifier); return C; } static Cursor maybeLexMachineBasicBlock( Cursor C, MIToken &Token, function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { - if (!C.remaining().startswith("%bb.")) + bool IsReference = C.remaining().startswith("%bb."); + if (!IsReference && !C.remaining().startswith("bb.")) return None; auto Range = C; - C.advance(4); // Skip '%bb.' + unsigned PrefixLength = IsReference ? 4 : 3; + C.advance(PrefixLength); // Skip '%bb.' or 'bb.' if (!isdigit(C.peek())) { - Token = MIToken(MIToken::Error, C.remaining()); + Token.reset(MIToken::Error, C.remaining()); ErrorCallback(C.location(), "expected a number after '%bb.'"); return C; } @@ -103,26 +259,103 @@ static Cursor maybeLexMachineBasicBlock( while (isdigit(C.peek())) C.advance(); StringRef Number = NumberRange.upto(C); - unsigned StringOffset = 4 + Number.size(); // Drop '%bb.<id>' + unsigned StringOffset = PrefixLength + Number.size(); // Drop '%bb.<id>' if (C.peek() == '.') { C.advance(); // Skip '.' ++StringOffset; while (isIdentifierChar(C.peek())) C.advance(); } - Token = MIToken(MIToken::MachineBasicBlock, Range.upto(C), APSInt(Number), - StringOffset); + Token.reset(IsReference ? MIToken::MachineBasicBlock + : MIToken::MachineBasicBlockLabel, + Range.upto(C)) + .setIntegerValue(APSInt(Number)) + .setStringValue(Range.upto(C).drop_front(StringOffset)); + return C; +} + +static Cursor maybeLexIndex(Cursor C, MIToken &Token, StringRef Rule, + MIToken::TokenKind Kind) { + if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size()))) + return None; + auto Range = C; + C.advance(Rule.size()); + auto NumberRange = C; + while (isdigit(C.peek())) + C.advance(); + Token.reset(Kind, Range.upto(C)).setIntegerValue(APSInt(NumberRange.upto(C))); + return C; +} + +static Cursor maybeLexIndexAndName(Cursor C, MIToken &Token, StringRef Rule, + MIToken::TokenKind Kind) { + if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size()))) + return None; + auto Range = C; + C.advance(Rule.size()); + auto NumberRange = C; + while (isdigit(C.peek())) + C.advance(); + StringRef Number = NumberRange.upto(C); + unsigned StringOffset = Rule.size() + Number.size(); + if (C.peek() == '.') { + C.advance(); + ++StringOffset; + while (isIdentifierChar(C.peek())) + C.advance(); + } + Token.reset(Kind, Range.upto(C)) + .setIntegerValue(APSInt(Number)) + .setStringValue(Range.upto(C).drop_front(StringOffset)); return C; } +static Cursor maybeLexJumpTableIndex(Cursor C, MIToken &Token) { + return maybeLexIndex(C, Token, "%jump-table.", MIToken::JumpTableIndex); +} + +static Cursor maybeLexStackObject(Cursor C, MIToken &Token) { + return maybeLexIndexAndName(C, Token, "%stack.", MIToken::StackObject); +} + +static Cursor maybeLexFixedStackObject(Cursor C, MIToken &Token) { + return maybeLexIndex(C, Token, "%fixed-stack.", MIToken::FixedStackObject); +} + +static Cursor maybeLexConstantPoolItem(Cursor C, MIToken &Token) { + return maybeLexIndex(C, Token, "%const.", MIToken::ConstantPoolItem); +} + +static Cursor maybeLexIRBlock( + Cursor C, MIToken &Token, + function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { + const StringRef Rule = "%ir-block."; + if (!C.remaining().startswith(Rule)) + return None; + if (isdigit(C.peek(Rule.size()))) + return maybeLexIndex(C, Token, Rule, MIToken::IRBlock); + return lexName(C, Token, MIToken::NamedIRBlock, Rule.size(), ErrorCallback); +} + +static Cursor maybeLexIRValue( + Cursor C, MIToken &Token, + function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { + const StringRef Rule = "%ir."; + if (!C.remaining().startswith(Rule)) + return None; + if (isdigit(C.peek(Rule.size()))) + return maybeLexIndex(C, Token, Rule, MIToken::IRValue); + return lexName(C, Token, MIToken::NamedIRValue, Rule.size(), ErrorCallback); +} + static Cursor lexVirtualRegister(Cursor C, MIToken &Token) { auto Range = C; C.advance(); // Skip '%' auto NumberRange = C; while (isdigit(C.peek())) C.advance(); - Token = MIToken(MIToken::VirtualRegister, Range.upto(C), - APSInt(NumberRange.upto(C))); + Token.reset(MIToken::VirtualRegister, Range.upto(C)) + .setIntegerValue(APSInt(NumberRange.upto(C))); return C; } @@ -135,41 +368,112 @@ static Cursor maybeLexRegister(Cursor C, MIToken &Token) { C.advance(); // Skip '%' while (isIdentifierChar(C.peek())) C.advance(); - Token = MIToken(MIToken::NamedRegister, Range.upto(C), - /*StringOffset=*/1); // Drop the '%' + Token.reset(MIToken::NamedRegister, Range.upto(C)) + .setStringValue(Range.upto(C).drop_front(1)); // Drop the '%' return C; } -static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token) { +static Cursor maybeLexGlobalValue( + Cursor C, MIToken &Token, + function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { if (C.peek() != '@') return None; + if (!isdigit(C.peek(1))) + return lexName(C, Token, MIToken::NamedGlobalValue, /*PrefixLength=*/1, + ErrorCallback); auto Range = C; - C.advance(); // Skip the '@' - // TODO: add support for quoted names. - if (!isdigit(C.peek())) { - while (isIdentifierChar(C.peek())) - C.advance(); - Token = MIToken(MIToken::NamedGlobalValue, Range.upto(C), - /*StringOffset=*/1); // Drop the '@' - return C; - } + C.advance(1); // Skip the '@' auto NumberRange = C; while (isdigit(C.peek())) C.advance(); - Token = - MIToken(MIToken::GlobalValue, Range.upto(C), APSInt(NumberRange.upto(C))); + Token.reset(MIToken::GlobalValue, Range.upto(C)) + .setIntegerValue(APSInt(NumberRange.upto(C))); return C; } -static Cursor maybeLexIntegerLiteral(Cursor C, MIToken &Token) { +static Cursor maybeLexExternalSymbol( + Cursor C, MIToken &Token, + function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { + if (C.peek() != '$') + return None; + return lexName(C, Token, MIToken::ExternalSymbol, /*PrefixLength=*/1, + ErrorCallback); +} + +static bool isValidHexFloatingPointPrefix(char C) { + return C == 'H' || C == 'K' || C == 'L' || C == 'M'; +} + +static Cursor maybeLexHexFloatingPointLiteral(Cursor C, MIToken &Token) { + if (C.peek() != '0' || C.peek(1) != 'x') + return None; + Cursor Range = C; + C.advance(2); // Skip '0x' + if (isValidHexFloatingPointPrefix(C.peek())) + C.advance(); + while (isxdigit(C.peek())) + C.advance(); + Token.reset(MIToken::FloatingPointLiteral, Range.upto(C)); + return C; +} + +static Cursor lexFloatingPointLiteral(Cursor Range, Cursor C, MIToken &Token) { + C.advance(); + // Skip over [0-9]*([eE][-+]?[0-9]+)? + while (isdigit(C.peek())) + C.advance(); + if ((C.peek() == 'e' || C.peek() == 'E') && + (isdigit(C.peek(1)) || + ((C.peek(1) == '-' || C.peek(1) == '+') && isdigit(C.peek(2))))) { + C.advance(2); + while (isdigit(C.peek())) + C.advance(); + } + Token.reset(MIToken::FloatingPointLiteral, Range.upto(C)); + return C; +} + +static Cursor maybeLexNumericalLiteral(Cursor C, MIToken &Token) { if (!isdigit(C.peek()) && (C.peek() != '-' || !isdigit(C.peek(1)))) return None; auto Range = C; C.advance(); while (isdigit(C.peek())) C.advance(); + if (C.peek() == '.') + return lexFloatingPointLiteral(Range, C, Token); StringRef StrVal = Range.upto(C); - Token = MIToken(MIToken::IntegerLiteral, StrVal, APSInt(StrVal)); + Token.reset(MIToken::IntegerLiteral, StrVal).setIntegerValue(APSInt(StrVal)); + return C; +} + +static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) { + return StringSwitch<MIToken::TokenKind>(Identifier) + .Case("!tbaa", MIToken::md_tbaa) + .Case("!alias.scope", MIToken::md_alias_scope) + .Case("!noalias", MIToken::md_noalias) + .Case("!range", MIToken::md_range) + .Default(MIToken::Error); +} + +static Cursor maybeLexExlaim( + Cursor C, MIToken &Token, + function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { + if (C.peek() != '!') + return None; + auto Range = C; + C.advance(1); + if (isdigit(C.peek()) || !isIdentifierChar(C.peek())) { + Token.reset(MIToken::exclaim, Range.upto(C)); + return C; + } + while (isIdentifierChar(C.peek())) + C.advance(); + StringRef StrVal = Range.upto(C); + Token.reset(getMetadataKeywordKind(StrVal), StrVal); + if (Token.isError()) + ErrorCallback(Token.location(), + "use of unknown metadata keyword '" + StrVal + "'"); return C; } @@ -181,44 +485,119 @@ static MIToken::TokenKind symbolToken(char C) { return MIToken::equal; case ':': return MIToken::colon; + case '(': + return MIToken::lparen; + case ')': + return MIToken::rparen; + case '{': + return MIToken::lbrace; + case '}': + return MIToken::rbrace; + case '+': + return MIToken::plus; + case '-': + return MIToken::minus; default: return MIToken::Error; } } static Cursor maybeLexSymbol(Cursor C, MIToken &Token) { - auto Kind = symbolToken(C.peek()); + MIToken::TokenKind Kind; + unsigned Length = 1; + if (C.peek() == ':' && C.peek(1) == ':') { + Kind = MIToken::coloncolon; + Length = 2; + } else + Kind = symbolToken(C.peek()); if (Kind == MIToken::Error) return None; auto Range = C; + C.advance(Length); + Token.reset(Kind, Range.upto(C)); + return C; +} + +static Cursor maybeLexNewline(Cursor C, MIToken &Token) { + if (!isNewlineChar(C.peek())) + return None; + auto Range = C; + C.advance(); + Token.reset(MIToken::Newline, Range.upto(C)); + return C; +} + +static Cursor maybeLexEscapedIRValue( + Cursor C, MIToken &Token, + function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { + if (C.peek() != '`') + return None; + auto Range = C; + C.advance(); + auto StrRange = C; + while (C.peek() != '`') { + if (C.isEOF() || isNewlineChar(C.peek())) { + ErrorCallback( + C.location(), + "end of machine instruction reached before the closing '`'"); + Token.reset(MIToken::Error, Range.remaining()); + return C; + } + C.advance(); + } + StringRef Value = StrRange.upto(C); C.advance(); - Token = MIToken(Kind, Range.upto(C)); + Token.reset(MIToken::QuotedIRValue, Range.upto(C)).setStringValue(Value); return C; } StringRef llvm::lexMIToken( StringRef Source, MIToken &Token, function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { - auto C = skipWhitespace(Cursor(Source)); + auto C = skipComment(skipWhitespace(Cursor(Source))); if (C.isEOF()) { - Token = MIToken(MIToken::Eof, C.remaining()); + Token.reset(MIToken::Eof, C.remaining()); return C.remaining(); } - if (Cursor R = maybeLexIdentifier(C, Token)) + if (Cursor R = maybeLexIntegerType(C, Token)) return R.remaining(); if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback)) return R.remaining(); + if (Cursor R = maybeLexIdentifier(C, Token)) + return R.remaining(); + if (Cursor R = maybeLexJumpTableIndex(C, Token)) + return R.remaining(); + if (Cursor R = maybeLexStackObject(C, Token)) + return R.remaining(); + if (Cursor R = maybeLexFixedStackObject(C, Token)) + return R.remaining(); + if (Cursor R = maybeLexConstantPoolItem(C, Token)) + return R.remaining(); + if (Cursor R = maybeLexIRBlock(C, Token, ErrorCallback)) + return R.remaining(); + if (Cursor R = maybeLexIRValue(C, Token, ErrorCallback)) + return R.remaining(); if (Cursor R = maybeLexRegister(C, Token)) return R.remaining(); - if (Cursor R = maybeLexGlobalValue(C, Token)) + if (Cursor R = maybeLexGlobalValue(C, Token, ErrorCallback)) + return R.remaining(); + if (Cursor R = maybeLexExternalSymbol(C, Token, ErrorCallback)) return R.remaining(); - if (Cursor R = maybeLexIntegerLiteral(C, Token)) + if (Cursor R = maybeLexHexFloatingPointLiteral(C, Token)) + return R.remaining(); + if (Cursor R = maybeLexNumericalLiteral(C, Token)) + return R.remaining(); + if (Cursor R = maybeLexExlaim(C, Token, ErrorCallback)) return R.remaining(); if (Cursor R = maybeLexSymbol(C, Token)) return R.remaining(); + if (Cursor R = maybeLexNewline(C, Token)) + return R.remaining(); + if (Cursor R = maybeLexEscapedIRValue(C, Token, ErrorCallback)) + return R.remaining(); - Token = MIToken(MIToken::Error, C.remaining()); + Token.reset(MIToken::Error, C.remaining()); ErrorCallback(C.location(), Twine("unexpected character '") + Twine(C.peek()) + "'"); return C.remaining(); diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h index 55460b56e7d6..ff54aa3554d8 100644 --- a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h +++ b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h @@ -30,50 +30,119 @@ struct MIToken { // Markers Eof, Error, + Newline, // Tokens with no info. comma, equal, underscore, colon, + coloncolon, + exclaim, + lparen, + rparen, + lbrace, + rbrace, + plus, + minus, // Keywords kw_implicit, kw_implicit_define, + kw_def, kw_dead, kw_killed, kw_undef, + kw_internal, + kw_early_clobber, + kw_debug_use, + kw_tied_def, + kw_frame_setup, + kw_debug_location, + kw_cfi_same_value, + kw_cfi_offset, + kw_cfi_def_cfa_register, + kw_cfi_def_cfa_offset, + kw_cfi_def_cfa, + kw_blockaddress, + kw_target_index, + kw_half, + kw_float, + kw_double, + kw_x86_fp80, + kw_fp128, + kw_ppc_fp128, + kw_target_flags, + kw_volatile, + kw_non_temporal, + kw_invariant, + kw_align, + kw_stack, + kw_got, + kw_jump_table, + kw_constant_pool, + kw_call_entry, + kw_liveout, + kw_address_taken, + kw_landing_pad, + kw_liveins, + kw_successors, + + // Named metadata keywords + md_tbaa, + md_alias_scope, + md_noalias, + md_range, // Identifier tokens Identifier, + IntegerType, NamedRegister, + MachineBasicBlockLabel, MachineBasicBlock, + StackObject, + FixedStackObject, NamedGlobalValue, GlobalValue, + ExternalSymbol, // Other tokens IntegerLiteral, - VirtualRegister + FloatingPointLiteral, + VirtualRegister, + ConstantPoolItem, + JumpTableIndex, + NamedIRBlock, + IRBlock, + NamedIRValue, + IRValue, + QuotedIRValue // `<constant value>` }; private: TokenKind Kind; - unsigned StringOffset; StringRef Range; + StringRef StringValue; + std::string StringValueStorage; APSInt IntVal; public: - MIToken(TokenKind Kind, StringRef Range, unsigned StringOffset = 0) - : Kind(Kind), StringOffset(StringOffset), Range(Range) {} + MIToken() : Kind(Error) {} - MIToken(TokenKind Kind, StringRef Range, const APSInt &IntVal, - unsigned StringOffset = 0) - : Kind(Kind), StringOffset(StringOffset), Range(Range), IntVal(IntVal) {} + MIToken &reset(TokenKind Kind, StringRef Range); + + MIToken &setStringValue(StringRef StrVal); + MIToken &setOwnedStringValue(std::string StrVal); + MIToken &setIntegerValue(APSInt IntVal); TokenKind kind() const { return Kind; } bool isError() const { return Kind == Error; } + bool isNewlineOrEOF() const { return Kind == Newline || Kind == Eof; } + + bool isErrorOrEOF() const { return Kind == Error || Kind == Eof; } + bool isRegister() const { return Kind == NamedRegister || Kind == underscore || Kind == VirtualRegister; @@ -81,7 +150,14 @@ public: bool isRegisterFlag() const { return Kind == kw_implicit || Kind == kw_implicit_define || - Kind == kw_dead || Kind == kw_killed || Kind == kw_undef; + Kind == kw_def || Kind == kw_dead || Kind == kw_killed || + Kind == kw_undef || Kind == kw_internal || + Kind == kw_early_clobber || Kind == kw_debug_use; + } + + bool isMemoryOperandFlag() const { + return Kind == kw_volatile || Kind == kw_non_temporal || + Kind == kw_invariant; } bool is(TokenKind K) const { return Kind == K; } @@ -90,13 +166,19 @@ public: StringRef::iterator location() const { return Range.begin(); } - StringRef stringValue() const { return Range.drop_front(StringOffset); } + StringRef range() const { return Range; } + + /// Return the token's string value. + StringRef stringValue() const { return StringValue; } const APSInt &integerValue() const { return IntVal; } bool hasIntegerValue() const { return Kind == IntegerLiteral || Kind == MachineBasicBlock || - Kind == GlobalValue || Kind == VirtualRegister; + Kind == MachineBasicBlockLabel || Kind == StackObject || + Kind == FixedStackObject || Kind == GlobalValue || + Kind == VirtualRegister || Kind == ConstantPoolItem || + Kind == JumpTableIndex || Kind == IRBlock || Kind == IRValue; } }; diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp index c00011288a60..f2f6584fb6c8 100644 --- a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -14,12 +14,20 @@ #include "MIParser.h" #include "MILexer.h" #include "llvm/ADT/StringMap.h" +#include "llvm/AsmParser/Parser.h" #include "llvm/AsmParser/SlotMapping.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/Module.h" +#include "llvm/IR/ModuleSlotTracker.h" +#include "llvm/IR/ValueSymbolTable.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -30,15 +38,20 @@ using namespace llvm; namespace { /// A wrapper struct around the 'MachineOperand' struct that includes a source -/// range. -struct MachineOperandWithLocation { +/// range and other attributes. +struct ParsedMachineOperand { MachineOperand Operand; StringRef::iterator Begin; StringRef::iterator End; - - MachineOperandWithLocation(const MachineOperand &Operand, - StringRef::iterator Begin, StringRef::iterator End) - : Operand(Operand), Begin(Begin), End(End) {} + Optional<unsigned> TiedDefIdx; + + ParsedMachineOperand(const MachineOperand &Operand, StringRef::iterator Begin, + StringRef::iterator End, Optional<unsigned> &TiedDefIdx) + : Operand(Operand), Begin(Begin), End(End), TiedDefIdx(TiedDefIdx) { + if (TiedDefIdx) + assert(Operand.isReg() && Operand.isUse() && + "Only used register operands can be tied"); + } }; class MIParser { @@ -58,6 +71,16 @@ class MIParser { StringMap<const uint32_t *> Names2RegMasks; /// Maps from subregister names to subregister indices. StringMap<unsigned> Names2SubRegIndices; + /// Maps from slot numbers to function's unnamed basic blocks. + DenseMap<unsigned, const BasicBlock *> Slots2BasicBlocks; + /// Maps from slot numbers to function's unnamed values. + DenseMap<unsigned, const Value *> Slots2Values; + /// Maps from target index names to target indices. + StringMap<int> Names2TargetIndices; + /// Maps from direct target flag names to the direct target flag values. + StringMap<unsigned> Names2DirectTargetFlags; + /// Maps from direct target flag names to the bitmask target flag values. + StringMap<unsigned> Names2BitmaskTargetFlags; public: MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error, @@ -76,19 +99,66 @@ public: /// This function always return true. bool error(StringRef::iterator Loc, const Twine &Msg); + bool + parseBasicBlockDefinitions(DenseMap<unsigned, MachineBasicBlock *> &MBBSlots); + bool parseBasicBlocks(); bool parse(MachineInstr *&MI); - bool parseMBB(MachineBasicBlock *&MBB); - bool parseNamedRegister(unsigned &Reg); + bool parseStandaloneMBB(MachineBasicBlock *&MBB); + bool parseStandaloneNamedRegister(unsigned &Reg); + bool parseStandaloneVirtualRegister(unsigned &Reg); + bool parseStandaloneStackObject(int &FI); + bool parseStandaloneMDNode(MDNode *&Node); + + bool + parseBasicBlockDefinition(DenseMap<unsigned, MachineBasicBlock *> &MBBSlots); + bool parseBasicBlock(MachineBasicBlock &MBB); + bool parseBasicBlockLiveins(MachineBasicBlock &MBB); + bool parseBasicBlockSuccessors(MachineBasicBlock &MBB); bool parseRegister(unsigned &Reg); bool parseRegisterFlag(unsigned &Flags); bool parseSubRegisterIndex(unsigned &SubReg); - bool parseRegisterOperand(MachineOperand &Dest, bool IsDef = false); + bool parseRegisterTiedDefIndex(unsigned &TiedDefIdx); + bool parseRegisterOperand(MachineOperand &Dest, + Optional<unsigned> &TiedDefIdx, bool IsDef = false); bool parseImmediateOperand(MachineOperand &Dest); + bool parseIRConstant(StringRef::iterator Loc, StringRef Source, + const Constant *&C); + bool parseIRConstant(StringRef::iterator Loc, const Constant *&C); + bool parseTypedImmediateOperand(MachineOperand &Dest); + bool parseFPImmediateOperand(MachineOperand &Dest); bool parseMBBReference(MachineBasicBlock *&MBB); bool parseMBBOperand(MachineOperand &Dest); + bool parseStackFrameIndex(int &FI); + bool parseStackObjectOperand(MachineOperand &Dest); + bool parseFixedStackFrameIndex(int &FI); + bool parseFixedStackObjectOperand(MachineOperand &Dest); + bool parseGlobalValue(GlobalValue *&GV); bool parseGlobalAddressOperand(MachineOperand &Dest); - bool parseMachineOperand(MachineOperand &Dest); + bool parseConstantPoolIndexOperand(MachineOperand &Dest); + bool parseJumpTableIndexOperand(MachineOperand &Dest); + bool parseExternalSymbolOperand(MachineOperand &Dest); + bool parseMDNode(MDNode *&Node); + bool parseMetadataOperand(MachineOperand &Dest); + bool parseCFIOffset(int &Offset); + bool parseCFIRegister(unsigned &Reg); + bool parseCFIOperand(MachineOperand &Dest); + bool parseIRBlock(BasicBlock *&BB, const Function &F); + bool parseBlockAddressOperand(MachineOperand &Dest); + bool parseTargetIndexOperand(MachineOperand &Dest); + bool parseLiveoutRegisterMaskOperand(MachineOperand &Dest); + bool parseMachineOperand(MachineOperand &Dest, + Optional<unsigned> &TiedDefIdx); + bool parseMachineOperandAndTargetFlags(MachineOperand &Dest, + Optional<unsigned> &TiedDefIdx); + bool parseOffset(int64_t &Offset); + bool parseAlignment(unsigned &Alignment); + bool parseOperandsOffset(MachineOperand &Op); + bool parseIRValue(const Value *&V); + bool parseMemoryOperandFlag(unsigned &Flags); + bool parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV); + bool parseMachinePointerInfo(MachinePointerInfo &Dest); + bool parseMachineMemoryOperand(MachineMemOperand *&Dest); private: /// Convert the integer literal in the current token into an unsigned integer. @@ -96,15 +166,31 @@ private: /// Return true if an error occurred. bool getUnsigned(unsigned &Result); + /// Convert the integer literal in the current token into an uint64. + /// + /// Return true if an error occurred. + bool getUint64(uint64_t &Result); + + /// If the current token is of the given kind, consume it and return false. + /// Otherwise report an error and return true. + bool expectAndConsume(MIToken::TokenKind TokenKind); + + /// If the current token is of the given kind, consume it and return true. + /// Otherwise return false. + bool consumeIfPresent(MIToken::TokenKind TokenKind); + void initNames2InstrOpCodes(); /// Try to convert an instruction name to an opcode. Return true if the /// instruction name is invalid. bool parseInstrName(StringRef InstrName, unsigned &OpCode); - bool parseInstruction(unsigned &OpCode); + bool parseInstruction(unsigned &OpCode, unsigned &Flags); + + bool assignRegisterTies(MachineInstr &MI, + ArrayRef<ParsedMachineOperand> Operands); - bool verifyImplicitOperands(ArrayRef<MachineOperandWithLocation> Operands, + bool verifyImplicitOperands(ArrayRef<ParsedMachineOperand> Operands, const MCInstrDesc &MCID); void initNames2Regs(); @@ -126,6 +212,34 @@ private: /// /// Return 0 if the name isn't a subregister index class. unsigned getSubRegIndex(StringRef Name); + + const BasicBlock *getIRBlock(unsigned Slot); + const BasicBlock *getIRBlock(unsigned Slot, const Function &F); + + const Value *getIRValue(unsigned Slot); + + void initNames2TargetIndices(); + + /// Try to convert a name of target index to the corresponding target index. + /// + /// Return true if the name isn't a name of a target index. + bool getTargetIndex(StringRef Name, int &Index); + + void initNames2DirectTargetFlags(); + + /// Try to convert a name of a direct target flag to the corresponding + /// target flag. + /// + /// Return true if the name isn't a name of a direct flag. + bool getDirectTargetFlag(StringRef Name, unsigned &Flag); + + void initNames2BitmaskTargetFlags(); + + /// Try to convert a name of a bitmask target flag to the corresponding + /// target flag. + /// + /// Return true if the name isn't a name of a bitmask target flag. + bool getBitmaskTargetFlag(StringRef Name, unsigned &Flag); }; } // end anonymous namespace @@ -134,7 +248,7 @@ MIParser::MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error, StringRef Source, const PerFunctionMIParsingState &PFS, const SlotMapping &IRSlots) : SM(SM), MF(MF), Error(Error), Source(Source), CurrentSource(Source), - Token(MIToken::Error, StringRef()), PFS(PFS), IRSlots(IRSlots) {} + PFS(PFS), IRSlots(IRSlots) {} void MIParser::lex() { CurrentSource = lexMIToken( @@ -146,49 +260,378 @@ bool MIParser::error(const Twine &Msg) { return error(Token.location(), Msg); } bool MIParser::error(StringRef::iterator Loc, const Twine &Msg) { assert(Loc >= Source.data() && Loc <= (Source.data() + Source.size())); - Error = SMDiagnostic( - SM, SMLoc(), - SM.getMemoryBuffer(SM.getMainFileID())->getBufferIdentifier(), 1, - Loc - Source.data(), SourceMgr::DK_Error, Msg.str(), Source, None, None); + const MemoryBuffer &Buffer = *SM.getMemoryBuffer(SM.getMainFileID()); + if (Loc >= Buffer.getBufferStart() && Loc <= Buffer.getBufferEnd()) { + // Create an ordinary diagnostic when the source manager's buffer is the + // source string. + Error = SM.GetMessage(SMLoc::getFromPointer(Loc), SourceMgr::DK_Error, Msg); + return true; + } + // Create a diagnostic for a YAML string literal. + Error = SMDiagnostic(SM, SMLoc(), Buffer.getBufferIdentifier(), 1, + Loc - Source.data(), SourceMgr::DK_Error, Msg.str(), + Source, None, None); return true; } -bool MIParser::parse(MachineInstr *&MI) { +static const char *toString(MIToken::TokenKind TokenKind) { + switch (TokenKind) { + case MIToken::comma: + return "','"; + case MIToken::equal: + return "'='"; + case MIToken::colon: + return "':'"; + case MIToken::lparen: + return "'('"; + case MIToken::rparen: + return "')'"; + default: + return "<unknown token>"; + } +} + +bool MIParser::expectAndConsume(MIToken::TokenKind TokenKind) { + if (Token.isNot(TokenKind)) + return error(Twine("expected ") + toString(TokenKind)); + lex(); + return false; +} + +bool MIParser::consumeIfPresent(MIToken::TokenKind TokenKind) { + if (Token.isNot(TokenKind)) + return false; + lex(); + return true; +} + +bool MIParser::parseBasicBlockDefinition( + DenseMap<unsigned, MachineBasicBlock *> &MBBSlots) { + assert(Token.is(MIToken::MachineBasicBlockLabel)); + unsigned ID = 0; + if (getUnsigned(ID)) + return true; + auto Loc = Token.location(); + auto Name = Token.stringValue(); + lex(); + bool HasAddressTaken = false; + bool IsLandingPad = false; + unsigned Alignment = 0; + BasicBlock *BB = nullptr; + if (consumeIfPresent(MIToken::lparen)) { + do { + // TODO: Report an error when multiple same attributes are specified. + switch (Token.kind()) { + case MIToken::kw_address_taken: + HasAddressTaken = true; + lex(); + break; + case MIToken::kw_landing_pad: + IsLandingPad = true; + lex(); + break; + case MIToken::kw_align: + if (parseAlignment(Alignment)) + return true; + break; + case MIToken::IRBlock: + // TODO: Report an error when both name and ir block are specified. + if (parseIRBlock(BB, *MF.getFunction())) + return true; + lex(); + break; + default: + break; + } + } while (consumeIfPresent(MIToken::comma)); + if (expectAndConsume(MIToken::rparen)) + return true; + } + if (expectAndConsume(MIToken::colon)) + return true; + + if (!Name.empty()) { + BB = dyn_cast_or_null<BasicBlock>( + MF.getFunction()->getValueSymbolTable().lookup(Name)); + if (!BB) + return error(Loc, Twine("basic block '") + Name + + "' is not defined in the function '" + + MF.getName() + "'"); + } + auto *MBB = MF.CreateMachineBasicBlock(BB); + MF.insert(MF.end(), MBB); + bool WasInserted = MBBSlots.insert(std::make_pair(ID, MBB)).second; + if (!WasInserted) + return error(Loc, Twine("redefinition of machine basic block with id #") + + Twine(ID)); + if (Alignment) + MBB->setAlignment(Alignment); + if (HasAddressTaken) + MBB->setHasAddressTaken(); + MBB->setIsEHPad(IsLandingPad); + return false; +} + +bool MIParser::parseBasicBlockDefinitions( + DenseMap<unsigned, MachineBasicBlock *> &MBBSlots) { + lex(); + // Skip until the first machine basic block. + while (Token.is(MIToken::Newline)) + lex(); + if (Token.isErrorOrEOF()) + return Token.isError(); + if (Token.isNot(MIToken::MachineBasicBlockLabel)) + return error("expected a basic block definition before instructions"); + unsigned BraceDepth = 0; + do { + if (parseBasicBlockDefinition(MBBSlots)) + return true; + bool IsAfterNewline = false; + // Skip until the next machine basic block. + while (true) { + if ((Token.is(MIToken::MachineBasicBlockLabel) && IsAfterNewline) || + Token.isErrorOrEOF()) + break; + else if (Token.is(MIToken::MachineBasicBlockLabel)) + return error("basic block definition should be located at the start of " + "the line"); + else if (consumeIfPresent(MIToken::Newline)) { + IsAfterNewline = true; + continue; + } + IsAfterNewline = false; + if (Token.is(MIToken::lbrace)) + ++BraceDepth; + if (Token.is(MIToken::rbrace)) { + if (!BraceDepth) + return error("extraneous closing brace ('}')"); + --BraceDepth; + } + lex(); + } + // Verify that we closed all of the '{' at the end of a file or a block. + if (!Token.isError() && BraceDepth) + return error("expected '}'"); // FIXME: Report a note that shows '{'. + } while (!Token.isErrorOrEOF()); + return Token.isError(); +} + +bool MIParser::parseBasicBlockLiveins(MachineBasicBlock &MBB) { + assert(Token.is(MIToken::kw_liveins)); + lex(); + if (expectAndConsume(MIToken::colon)) + return true; + if (Token.isNewlineOrEOF()) // Allow an empty list of liveins. + return false; + do { + if (Token.isNot(MIToken::NamedRegister)) + return error("expected a named register"); + unsigned Reg = 0; + if (parseRegister(Reg)) + return true; + MBB.addLiveIn(Reg); + lex(); + } while (consumeIfPresent(MIToken::comma)); + return false; +} + +bool MIParser::parseBasicBlockSuccessors(MachineBasicBlock &MBB) { + assert(Token.is(MIToken::kw_successors)); lex(); + if (expectAndConsume(MIToken::colon)) + return true; + if (Token.isNewlineOrEOF()) // Allow an empty list of successors. + return false; + do { + if (Token.isNot(MIToken::MachineBasicBlock)) + return error("expected a machine basic block reference"); + MachineBasicBlock *SuccMBB = nullptr; + if (parseMBBReference(SuccMBB)) + return true; + lex(); + unsigned Weight = 0; + if (consumeIfPresent(MIToken::lparen)) { + if (Token.isNot(MIToken::IntegerLiteral)) + return error("expected an integer literal after '('"); + if (getUnsigned(Weight)) + return true; + lex(); + if (expectAndConsume(MIToken::rparen)) + return true; + } + MBB.addSuccessor(SuccMBB, BranchProbability::getRaw(Weight)); + } while (consumeIfPresent(MIToken::comma)); + MBB.normalizeSuccProbs(); + return false; +} +bool MIParser::parseBasicBlock(MachineBasicBlock &MBB) { + // Skip the definition. + assert(Token.is(MIToken::MachineBasicBlockLabel)); + lex(); + if (consumeIfPresent(MIToken::lparen)) { + while (Token.isNot(MIToken::rparen) && !Token.isErrorOrEOF()) + lex(); + consumeIfPresent(MIToken::rparen); + } + consumeIfPresent(MIToken::colon); + + // Parse the liveins and successors. + // N.B: Multiple lists of successors and liveins are allowed and they're + // merged into one. + // Example: + // liveins: %edi + // liveins: %esi + // + // is equivalent to + // liveins: %edi, %esi + while (true) { + if (Token.is(MIToken::kw_successors)) { + if (parseBasicBlockSuccessors(MBB)) + return true; + } else if (Token.is(MIToken::kw_liveins)) { + if (parseBasicBlockLiveins(MBB)) + return true; + } else if (consumeIfPresent(MIToken::Newline)) { + continue; + } else + break; + if (!Token.isNewlineOrEOF()) + return error("expected line break at the end of a list"); + lex(); + } + + // Parse the instructions. + bool IsInBundle = false; + MachineInstr *PrevMI = nullptr; + while (true) { + if (Token.is(MIToken::MachineBasicBlockLabel) || Token.is(MIToken::Eof)) + return false; + else if (consumeIfPresent(MIToken::Newline)) + continue; + if (consumeIfPresent(MIToken::rbrace)) { + // The first parsing pass should verify that all closing '}' have an + // opening '{'. + assert(IsInBundle); + IsInBundle = false; + continue; + } + MachineInstr *MI = nullptr; + if (parse(MI)) + return true; + MBB.insert(MBB.end(), MI); + if (IsInBundle) { + PrevMI->setFlag(MachineInstr::BundledSucc); + MI->setFlag(MachineInstr::BundledPred); + } + PrevMI = MI; + if (Token.is(MIToken::lbrace)) { + if (IsInBundle) + return error("nested instruction bundles are not allowed"); + lex(); + // This instruction is the start of the bundle. + MI->setFlag(MachineInstr::BundledSucc); + IsInBundle = true; + if (!Token.is(MIToken::Newline)) + // The next instruction can be on the same line. + continue; + } + assert(Token.isNewlineOrEOF() && "MI is not fully parsed"); + lex(); + } + return false; +} + +bool MIParser::parseBasicBlocks() { + lex(); + // Skip until the first machine basic block. + while (Token.is(MIToken::Newline)) + lex(); + if (Token.isErrorOrEOF()) + return Token.isError(); + // The first parsing pass should have verified that this token is a MBB label + // in the 'parseBasicBlockDefinitions' method. + assert(Token.is(MIToken::MachineBasicBlockLabel)); + do { + MachineBasicBlock *MBB = nullptr; + if (parseMBBReference(MBB)) + return true; + if (parseBasicBlock(*MBB)) + return true; + // The method 'parseBasicBlock' should parse the whole block until the next + // block or the end of file. + assert(Token.is(MIToken::MachineBasicBlockLabel) || Token.is(MIToken::Eof)); + } while (Token.isNot(MIToken::Eof)); + return false; +} + +bool MIParser::parse(MachineInstr *&MI) { // Parse any register operands before '=' - // TODO: Allow parsing of multiple operands before '=' MachineOperand MO = MachineOperand::CreateImm(0); - SmallVector<MachineOperandWithLocation, 8> Operands; - if (Token.isRegister() || Token.isRegisterFlag()) { + SmallVector<ParsedMachineOperand, 8> Operands; + while (Token.isRegister() || Token.isRegisterFlag()) { auto Loc = Token.location(); - if (parseRegisterOperand(MO, /*IsDef=*/true)) + Optional<unsigned> TiedDefIdx; + if (parseRegisterOperand(MO, TiedDefIdx, /*IsDef=*/true)) return true; - Operands.push_back(MachineOperandWithLocation(MO, Loc, Token.location())); - if (Token.isNot(MIToken::equal)) - return error("expected '='"); + Operands.push_back( + ParsedMachineOperand(MO, Loc, Token.location(), TiedDefIdx)); + if (Token.isNot(MIToken::comma)) + break; lex(); } - - unsigned OpCode; - if (Token.isError() || parseInstruction(OpCode)) + if (!Operands.empty() && expectAndConsume(MIToken::equal)) return true; - // TODO: Parse the instruction flags and memory operands. + unsigned OpCode, Flags = 0; + if (Token.isError() || parseInstruction(OpCode, Flags)) + return true; // Parse the remaining machine operands. - while (Token.isNot(MIToken::Eof)) { + while (!Token.isNewlineOrEOF() && Token.isNot(MIToken::kw_debug_location) && + Token.isNot(MIToken::coloncolon) && Token.isNot(MIToken::lbrace)) { auto Loc = Token.location(); - if (parseMachineOperand(MO)) + Optional<unsigned> TiedDefIdx; + if (parseMachineOperandAndTargetFlags(MO, TiedDefIdx)) return true; - Operands.push_back(MachineOperandWithLocation(MO, Loc, Token.location())); - if (Token.is(MIToken::Eof)) + Operands.push_back( + ParsedMachineOperand(MO, Loc, Token.location(), TiedDefIdx)); + if (Token.isNewlineOrEOF() || Token.is(MIToken::coloncolon) || + Token.is(MIToken::lbrace)) break; if (Token.isNot(MIToken::comma)) return error("expected ',' before the next machine operand"); lex(); } + DebugLoc DebugLocation; + if (Token.is(MIToken::kw_debug_location)) { + lex(); + if (Token.isNot(MIToken::exclaim)) + return error("expected a metadata node after 'debug-location'"); + MDNode *Node = nullptr; + if (parseMDNode(Node)) + return true; + DebugLocation = DebugLoc(Node); + } + + // Parse the machine memory operands. + SmallVector<MachineMemOperand *, 2> MemOperands; + if (Token.is(MIToken::coloncolon)) { + lex(); + while (!Token.isNewlineOrEOF()) { + MachineMemOperand *MemOp = nullptr; + if (parseMachineMemoryOperand(MemOp)) + return true; + MemOperands.push_back(MemOp); + if (Token.isNewlineOrEOF()) + break; + if (Token.isNot(MIToken::comma)) + return error("expected ',' before the next machine memory operand"); + lex(); + } + } + const auto &MCID = MF.getSubtarget().getInstrInfo()->get(OpCode); if (!MCID.isVariadic()) { // FIXME: Move the implicit operand verification to the machine verifier. @@ -197,13 +640,22 @@ bool MIParser::parse(MachineInstr *&MI) { } // TODO: Check for extraneous machine operands. - MI = MF.CreateMachineInstr(MCID, DebugLoc(), /*NoImplicit=*/true); + MI = MF.CreateMachineInstr(MCID, DebugLocation, /*NoImplicit=*/true); + MI->setFlags(Flags); for (const auto &Operand : Operands) MI->addOperand(MF, Operand.Operand); + if (assignRegisterTies(*MI, Operands)) + return true; + if (MemOperands.empty()) + return false; + MachineInstr::mmo_iterator MemRefs = + MF.allocateMemRefsArray(MemOperands.size()); + std::copy(MemOperands.begin(), MemOperands.end(), MemRefs); + MI->setMemRefs(MemRefs, MemRefs + MemOperands.size()); return false; } -bool MIParser::parseMBB(MachineBasicBlock *&MBB) { +bool MIParser::parseStandaloneMBB(MachineBasicBlock *&MBB) { lex(); if (Token.isNot(MIToken::MachineBasicBlock)) return error("expected a machine basic block reference"); @@ -216,18 +668,52 @@ bool MIParser::parseMBB(MachineBasicBlock *&MBB) { return false; } -bool MIParser::parseNamedRegister(unsigned &Reg) { +bool MIParser::parseStandaloneNamedRegister(unsigned &Reg) { lex(); if (Token.isNot(MIToken::NamedRegister)) return error("expected a named register"); if (parseRegister(Reg)) - return 0; + return true; + lex(); + if (Token.isNot(MIToken::Eof)) + return error("expected end of string after the register reference"); + return false; +} + +bool MIParser::parseStandaloneVirtualRegister(unsigned &Reg) { + lex(); + if (Token.isNot(MIToken::VirtualRegister)) + return error("expected a virtual register"); + if (parseRegister(Reg)) + return true; lex(); if (Token.isNot(MIToken::Eof)) return error("expected end of string after the register reference"); return false; } +bool MIParser::parseStandaloneStackObject(int &FI) { + lex(); + if (Token.isNot(MIToken::StackObject)) + return error("expected a stack object"); + if (parseStackFrameIndex(FI)) + return true; + if (Token.isNot(MIToken::Eof)) + return error("expected end of string after the stack object reference"); + return false; +} + +bool MIParser::parseStandaloneMDNode(MDNode *&Node) { + lex(); + if (Token.isNot(MIToken::exclaim)) + return error("expected a metadata node"); + if (parseMDNode(Node)) + return true; + if (Token.isNot(MIToken::Eof)) + return error("expected end of string after the metadata node"); + return false; +} + static const char *printImplicitRegisterFlag(const MachineOperand &MO) { assert(MO.isImplicit()); return MO.isDef() ? "implicit-def" : "implicit"; @@ -239,8 +725,18 @@ static std::string getRegisterName(const TargetRegisterInfo *TRI, return StringRef(TRI->getName(Reg)).lower(); } -bool MIParser::verifyImplicitOperands( - ArrayRef<MachineOperandWithLocation> Operands, const MCInstrDesc &MCID) { +/// Return true if the parsed machine operands contain a given machine operand. +static bool isImplicitOperandIn(const MachineOperand &ImplicitOperand, + ArrayRef<ParsedMachineOperand> Operands) { + for (const auto &I : Operands) { + if (ImplicitOperand.isIdenticalTo(I.Operand)) + return true; + } + return false; +} + +bool MIParser::verifyImplicitOperands(ArrayRef<ParsedMachineOperand> Operands, + const MCInstrDesc &MCID) { if (MCID.isCall()) // We can't verify call instructions as they can contain arbitrary implicit // register and register mask operands. @@ -249,48 +745,32 @@ bool MIParser::verifyImplicitOperands( // Gather all the expected implicit operands. SmallVector<MachineOperand, 4> ImplicitOperands; if (MCID.ImplicitDefs) - for (const uint16_t *ImpDefs = MCID.getImplicitDefs(); *ImpDefs; ++ImpDefs) + for (const MCPhysReg *ImpDefs = MCID.getImplicitDefs(); *ImpDefs; ++ImpDefs) ImplicitOperands.push_back( MachineOperand::CreateReg(*ImpDefs, true, true)); if (MCID.ImplicitUses) - for (const uint16_t *ImpUses = MCID.getImplicitUses(); *ImpUses; ++ImpUses) + for (const MCPhysReg *ImpUses = MCID.getImplicitUses(); *ImpUses; ++ImpUses) ImplicitOperands.push_back( MachineOperand::CreateReg(*ImpUses, false, true)); const auto *TRI = MF.getSubtarget().getRegisterInfo(); assert(TRI && "Expected target register info"); - size_t I = ImplicitOperands.size(), J = Operands.size(); - while (I) { - --I; - if (J) { - --J; - const auto &ImplicitOperand = ImplicitOperands[I]; - const auto &Operand = Operands[J].Operand; - if (ImplicitOperand.isIdenticalTo(Operand)) - continue; - if (Operand.isReg() && Operand.isImplicit()) { - return error(Operands[J].Begin, - Twine("expected an implicit register operand '") + - printImplicitRegisterFlag(ImplicitOperand) + " %" + - getRegisterName(TRI, ImplicitOperand.getReg()) + "'"); - } - } - // TODO: Fix source location when Operands[J].end is right before '=', i.e: - // insead of reporting an error at this location: - // %eax = MOV32r0 - // ^ - // report the error at the following location: - // %eax = MOV32r0 - // ^ - return error(J < Operands.size() ? Operands[J].End : Token.location(), + for (const auto &I : ImplicitOperands) { + if (isImplicitOperandIn(I, Operands)) + continue; + return error(Operands.empty() ? Token.location() : Operands.back().End, Twine("missing implicit register operand '") + - printImplicitRegisterFlag(ImplicitOperands[I]) + " %" + - getRegisterName(TRI, ImplicitOperands[I].getReg()) + "'"); + printImplicitRegisterFlag(I) + " %" + + getRegisterName(TRI, I.getReg()) + "'"); } return false; } -bool MIParser::parseInstruction(unsigned &OpCode) { +bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) { + if (Token.is(MIToken::kw_frame_setup)) { + Flags |= MachineInstr::FrameSetup; + lex(); + } if (Token.isNot(MIToken::Identifier)) return error("expected a machine instruction"); StringRef InstrName = Token.stringValue(); @@ -330,6 +810,7 @@ bool MIParser::parseRegister(unsigned &Reg) { } bool MIParser::parseRegisterFlag(unsigned &Flags) { + const unsigned OldFlags = Flags; switch (Token.kind()) { case MIToken::kw_implicit: Flags |= RegState::Implicit; @@ -337,6 +818,9 @@ bool MIParser::parseRegisterFlag(unsigned &Flags) { case MIToken::kw_implicit_define: Flags |= RegState::ImplicitDefine; break; + case MIToken::kw_def: + Flags |= RegState::Define; + break; case MIToken::kw_dead: Flags |= RegState::Dead; break; @@ -346,11 +830,22 @@ bool MIParser::parseRegisterFlag(unsigned &Flags) { case MIToken::kw_undef: Flags |= RegState::Undef; break; - // TODO: report an error when we specify the same flag more than once. - // TODO: parse the other register flags. + case MIToken::kw_internal: + Flags |= RegState::InternalRead; + break; + case MIToken::kw_early_clobber: + Flags |= RegState::EarlyClobber; + break; + case MIToken::kw_debug_use: + Flags |= RegState::Debug; + break; default: llvm_unreachable("The current token should be a register flag"); } + if (OldFlags == Flags) + // We know that the same flag is specified more than once when the flags + // weren't modified. + return error("duplicate '" + Token.stringValue() + "' register flag"); lex(); return false; } @@ -368,7 +863,59 @@ bool MIParser::parseSubRegisterIndex(unsigned &SubReg) { return false; } -bool MIParser::parseRegisterOperand(MachineOperand &Dest, bool IsDef) { +bool MIParser::parseRegisterTiedDefIndex(unsigned &TiedDefIdx) { + if (!consumeIfPresent(MIToken::kw_tied_def)) + return error("expected 'tied-def' after '('"); + if (Token.isNot(MIToken::IntegerLiteral)) + return error("expected an integer literal after 'tied-def'"); + if (getUnsigned(TiedDefIdx)) + return true; + lex(); + if (expectAndConsume(MIToken::rparen)) + return true; + return false; +} + +bool MIParser::assignRegisterTies(MachineInstr &MI, + ArrayRef<ParsedMachineOperand> Operands) { + SmallVector<std::pair<unsigned, unsigned>, 4> TiedRegisterPairs; + for (unsigned I = 0, E = Operands.size(); I != E; ++I) { + if (!Operands[I].TiedDefIdx) + continue; + // The parser ensures that this operand is a register use, so we just have + // to check the tied-def operand. + unsigned DefIdx = Operands[I].TiedDefIdx.getValue(); + if (DefIdx >= E) + return error(Operands[I].Begin, + Twine("use of invalid tied-def operand index '" + + Twine(DefIdx) + "'; instruction has only ") + + Twine(E) + " operands"); + const auto &DefOperand = Operands[DefIdx].Operand; + if (!DefOperand.isReg() || !DefOperand.isDef()) + // FIXME: add note with the def operand. + return error(Operands[I].Begin, + Twine("use of invalid tied-def operand index '") + + Twine(DefIdx) + "'; the operand #" + Twine(DefIdx) + + " isn't a defined register"); + // Check that the tied-def operand wasn't tied elsewhere. + for (const auto &TiedPair : TiedRegisterPairs) { + if (TiedPair.first == DefIdx) + return error(Operands[I].Begin, + Twine("the tied-def operand #") + Twine(DefIdx) + + " is already tied with another register operand"); + } + TiedRegisterPairs.push_back(std::make_pair(DefIdx, I)); + } + // FIXME: Verify that for non INLINEASM instructions, the def and use tied + // indices must be less than tied max. + for (const auto &TiedPair : TiedRegisterPairs) + MI.tieOperands(TiedPair.first, TiedPair.second); + return false; +} + +bool MIParser::parseRegisterOperand(MachineOperand &Dest, + Optional<unsigned> &TiedDefIdx, + bool IsDef) { unsigned Reg; unsigned Flags = IsDef ? RegState::Define : 0; while (Token.isRegisterFlag()) { @@ -385,10 +932,17 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest, bool IsDef) { if (parseSubRegisterIndex(SubReg)) return true; } + if ((Flags & RegState::Define) == 0 && consumeIfPresent(MIToken::lparen)) { + unsigned Idx; + if (parseRegisterTiedDefIndex(Idx)) + return true; + TiedDefIdx = Idx; + } Dest = MachineOperand::CreateReg( Reg, Flags & RegState::Define, Flags & RegState::Implicit, Flags & RegState::Kill, Flags & RegState::Dead, Flags & RegState::Undef, - /*isEarlyClobber=*/false, SubReg); + Flags & RegState::EarlyClobber, SubReg, Flags & RegState::Debug, + Flags & RegState::InternalRead); return false; } @@ -396,13 +950,55 @@ bool MIParser::parseImmediateOperand(MachineOperand &Dest) { assert(Token.is(MIToken::IntegerLiteral)); const APSInt &Int = Token.integerValue(); if (Int.getMinSignedBits() > 64) - // TODO: Replace this with an error when we can parse CIMM Machine Operands. - llvm_unreachable("Can't parse large integer literals yet!"); + return error("integer literal is too large to be an immediate operand"); Dest = MachineOperand::CreateImm(Int.getExtValue()); lex(); return false; } +bool MIParser::parseIRConstant(StringRef::iterator Loc, StringRef StringValue, + const Constant *&C) { + auto Source = StringValue.str(); // The source has to be null terminated. + SMDiagnostic Err; + C = parseConstantValue(Source.c_str(), Err, *MF.getFunction()->getParent(), + &IRSlots); + if (!C) + return error(Loc + Err.getColumnNo(), Err.getMessage()); + return false; +} + +bool MIParser::parseIRConstant(StringRef::iterator Loc, const Constant *&C) { + if (parseIRConstant(Loc, StringRef(Loc, Token.range().end() - Loc), C)) + return true; + lex(); + return false; +} + +bool MIParser::parseTypedImmediateOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::IntegerType)); + auto Loc = Token.location(); + lex(); + if (Token.isNot(MIToken::IntegerLiteral)) + return error("expected an integer literal"); + const Constant *C = nullptr; + if (parseIRConstant(Loc, C)) + return true; + Dest = MachineOperand::CreateCImm(cast<ConstantInt>(C)); + return false; +} + +bool MIParser::parseFPImmediateOperand(MachineOperand &Dest) { + auto Loc = Token.location(); + lex(); + if (Token.isNot(MIToken::FloatingPointLiteral)) + return error("expected a floating point literal"); + const Constant *C = nullptr; + if (parseIRConstant(Loc, C)) + return true; + Dest = MachineOperand::CreateFPImm(cast<ConstantFP>(C)); + return false; +} + bool MIParser::getUnsigned(unsigned &Result) { assert(Token.hasIntegerValue() && "Expected a token with an integer value"); const uint64_t Limit = uint64_t(std::numeric_limits<unsigned>::max()) + 1; @@ -414,7 +1010,8 @@ bool MIParser::getUnsigned(unsigned &Result) { } bool MIParser::parseMBBReference(MachineBasicBlock *&MBB) { - assert(Token.is(MIToken::MachineBasicBlock)); + assert(Token.is(MIToken::MachineBasicBlock) || + Token.is(MIToken::MachineBasicBlockLabel)); unsigned Number; if (getUnsigned(Number)) return true; @@ -438,16 +1035,66 @@ bool MIParser::parseMBBOperand(MachineOperand &Dest) { return false; } -bool MIParser::parseGlobalAddressOperand(MachineOperand &Dest) { +bool MIParser::parseStackFrameIndex(int &FI) { + assert(Token.is(MIToken::StackObject)); + unsigned ID; + if (getUnsigned(ID)) + return true; + auto ObjectInfo = PFS.StackObjectSlots.find(ID); + if (ObjectInfo == PFS.StackObjectSlots.end()) + return error(Twine("use of undefined stack object '%stack.") + Twine(ID) + + "'"); + StringRef Name; + if (const auto *Alloca = + MF.getFrameInfo()->getObjectAllocation(ObjectInfo->second)) + Name = Alloca->getName(); + if (!Token.stringValue().empty() && Token.stringValue() != Name) + return error(Twine("the name of the stack object '%stack.") + Twine(ID) + + "' isn't '" + Token.stringValue() + "'"); + lex(); + FI = ObjectInfo->second; + return false; +} + +bool MIParser::parseStackObjectOperand(MachineOperand &Dest) { + int FI; + if (parseStackFrameIndex(FI)) + return true; + Dest = MachineOperand::CreateFI(FI); + return false; +} + +bool MIParser::parseFixedStackFrameIndex(int &FI) { + assert(Token.is(MIToken::FixedStackObject)); + unsigned ID; + if (getUnsigned(ID)) + return true; + auto ObjectInfo = PFS.FixedStackObjectSlots.find(ID); + if (ObjectInfo == PFS.FixedStackObjectSlots.end()) + return error(Twine("use of undefined fixed stack object '%fixed-stack.") + + Twine(ID) + "'"); + lex(); + FI = ObjectInfo->second; + return false; +} + +bool MIParser::parseFixedStackObjectOperand(MachineOperand &Dest) { + int FI; + if (parseFixedStackFrameIndex(FI)) + return true; + Dest = MachineOperand::CreateFI(FI); + return false; +} + +bool MIParser::parseGlobalValue(GlobalValue *&GV) { switch (Token.kind()) { case MIToken::NamedGlobalValue: { - auto Name = Token.stringValue(); const Module *M = MF.getFunction()->getParent(); - if (const auto *GV = M->getNamedValue(Name)) { - Dest = MachineOperand::CreateGA(GV, /*Offset=*/0); - break; - } - return error(Twine("use of undefined global value '@") + Name + "'"); + GV = M->getNamedValue(Token.stringValue()); + if (!GV) + return error(Twine("use of undefined global value '") + Token.range() + + "'"); + break; } case MIToken::GlobalValue: { unsigned GVIdx; @@ -456,36 +1103,323 @@ bool MIParser::parseGlobalAddressOperand(MachineOperand &Dest) { if (GVIdx >= IRSlots.GlobalValues.size()) return error(Twine("use of undefined global value '@") + Twine(GVIdx) + "'"); - Dest = MachineOperand::CreateGA(IRSlots.GlobalValues[GVIdx], - /*Offset=*/0); + GV = IRSlots.GlobalValues[GVIdx]; break; } default: llvm_unreachable("The current token should be a global value"); } - // TODO: Parse offset and target flags. + return false; +} + +bool MIParser::parseGlobalAddressOperand(MachineOperand &Dest) { + GlobalValue *GV = nullptr; + if (parseGlobalValue(GV)) + return true; + lex(); + Dest = MachineOperand::CreateGA(GV, /*Offset=*/0); + if (parseOperandsOffset(Dest)) + return true; + return false; +} + +bool MIParser::parseConstantPoolIndexOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::ConstantPoolItem)); + unsigned ID; + if (getUnsigned(ID)) + return true; + auto ConstantInfo = PFS.ConstantPoolSlots.find(ID); + if (ConstantInfo == PFS.ConstantPoolSlots.end()) + return error("use of undefined constant '%const." + Twine(ID) + "'"); + lex(); + Dest = MachineOperand::CreateCPI(ID, /*Offset=*/0); + if (parseOperandsOffset(Dest)) + return true; + return false; +} + +bool MIParser::parseJumpTableIndexOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::JumpTableIndex)); + unsigned ID; + if (getUnsigned(ID)) + return true; + auto JumpTableEntryInfo = PFS.JumpTableSlots.find(ID); + if (JumpTableEntryInfo == PFS.JumpTableSlots.end()) + return error("use of undefined jump table '%jump-table." + Twine(ID) + "'"); + lex(); + Dest = MachineOperand::CreateJTI(JumpTableEntryInfo->second); + return false; +} + +bool MIParser::parseExternalSymbolOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::ExternalSymbol)); + const char *Symbol = MF.createExternalSymbolName(Token.stringValue()); + lex(); + Dest = MachineOperand::CreateES(Symbol); + if (parseOperandsOffset(Dest)) + return true; + return false; +} + +bool MIParser::parseMDNode(MDNode *&Node) { + assert(Token.is(MIToken::exclaim)); + auto Loc = Token.location(); + lex(); + if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned()) + return error("expected metadata id after '!'"); + unsigned ID; + if (getUnsigned(ID)) + return true; + auto NodeInfo = IRSlots.MetadataNodes.find(ID); + if (NodeInfo == IRSlots.MetadataNodes.end()) + return error(Loc, "use of undefined metadata '!" + Twine(ID) + "'"); + lex(); + Node = NodeInfo->second.get(); + return false; +} + +bool MIParser::parseMetadataOperand(MachineOperand &Dest) { + MDNode *Node = nullptr; + if (parseMDNode(Node)) + return true; + Dest = MachineOperand::CreateMetadata(Node); + return false; +} + +bool MIParser::parseCFIOffset(int &Offset) { + if (Token.isNot(MIToken::IntegerLiteral)) + return error("expected a cfi offset"); + if (Token.integerValue().getMinSignedBits() > 32) + return error("expected a 32 bit integer (the cfi offset is too large)"); + Offset = (int)Token.integerValue().getExtValue(); + lex(); + return false; +} + +bool MIParser::parseCFIRegister(unsigned &Reg) { + if (Token.isNot(MIToken::NamedRegister)) + return error("expected a cfi register"); + unsigned LLVMReg; + if (parseRegister(LLVMReg)) + return true; + const auto *TRI = MF.getSubtarget().getRegisterInfo(); + assert(TRI && "Expected target register info"); + int DwarfReg = TRI->getDwarfRegNum(LLVMReg, true); + if (DwarfReg < 0) + return error("invalid DWARF register"); + Reg = (unsigned)DwarfReg; + lex(); + return false; +} + +bool MIParser::parseCFIOperand(MachineOperand &Dest) { + auto Kind = Token.kind(); + lex(); + auto &MMI = MF.getMMI(); + int Offset; + unsigned Reg; + unsigned CFIIndex; + switch (Kind) { + case MIToken::kw_cfi_same_value: + if (parseCFIRegister(Reg)) + return true; + CFIIndex = + MMI.addFrameInst(MCCFIInstruction::createSameValue(nullptr, Reg)); + break; + case MIToken::kw_cfi_offset: + if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) || + parseCFIOffset(Offset)) + return true; + CFIIndex = + MMI.addFrameInst(MCCFIInstruction::createOffset(nullptr, Reg, Offset)); + break; + case MIToken::kw_cfi_def_cfa_register: + if (parseCFIRegister(Reg)) + return true; + CFIIndex = + MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); + break; + case MIToken::kw_cfi_def_cfa_offset: + if (parseCFIOffset(Offset)) + return true; + // NB: MCCFIInstruction::createDefCfaOffset negates the offset. + CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createDefCfaOffset(nullptr, -Offset)); + break; + case MIToken::kw_cfi_def_cfa: + if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) || + parseCFIOffset(Offset)) + return true; + // NB: MCCFIInstruction::createDefCfa negates the offset. + CFIIndex = + MMI.addFrameInst(MCCFIInstruction::createDefCfa(nullptr, Reg, -Offset)); + break; + default: + // TODO: Parse the other CFI operands. + llvm_unreachable("The current token should be a cfi operand"); + } + Dest = MachineOperand::CreateCFIIndex(CFIIndex); + return false; +} + +bool MIParser::parseIRBlock(BasicBlock *&BB, const Function &F) { + switch (Token.kind()) { + case MIToken::NamedIRBlock: { + BB = dyn_cast_or_null<BasicBlock>( + F.getValueSymbolTable().lookup(Token.stringValue())); + if (!BB) + return error(Twine("use of undefined IR block '") + Token.range() + "'"); + break; + } + case MIToken::IRBlock: { + unsigned SlotNumber = 0; + if (getUnsigned(SlotNumber)) + return true; + BB = const_cast<BasicBlock *>(getIRBlock(SlotNumber, F)); + if (!BB) + return error(Twine("use of undefined IR block '%ir-block.") + + Twine(SlotNumber) + "'"); + break; + } + default: + llvm_unreachable("The current token should be an IR block reference"); + } + return false; +} + +bool MIParser::parseBlockAddressOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::kw_blockaddress)); + lex(); + if (expectAndConsume(MIToken::lparen)) + return true; + if (Token.isNot(MIToken::GlobalValue) && + Token.isNot(MIToken::NamedGlobalValue)) + return error("expected a global value"); + GlobalValue *GV = nullptr; + if (parseGlobalValue(GV)) + return true; + auto *F = dyn_cast<Function>(GV); + if (!F) + return error("expected an IR function reference"); + lex(); + if (expectAndConsume(MIToken::comma)) + return true; + BasicBlock *BB = nullptr; + if (Token.isNot(MIToken::IRBlock) && Token.isNot(MIToken::NamedIRBlock)) + return error("expected an IR block reference"); + if (parseIRBlock(BB, *F)) + return true; + lex(); + if (expectAndConsume(MIToken::rparen)) + return true; + Dest = MachineOperand::CreateBA(BlockAddress::get(F, BB), /*Offset=*/0); + if (parseOperandsOffset(Dest)) + return true; + return false; +} + +bool MIParser::parseTargetIndexOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::kw_target_index)); + lex(); + if (expectAndConsume(MIToken::lparen)) + return true; + if (Token.isNot(MIToken::Identifier)) + return error("expected the name of the target index"); + int Index = 0; + if (getTargetIndex(Token.stringValue(), Index)) + return error("use of undefined target index '" + Token.stringValue() + "'"); lex(); + if (expectAndConsume(MIToken::rparen)) + return true; + Dest = MachineOperand::CreateTargetIndex(unsigned(Index), /*Offset=*/0); + if (parseOperandsOffset(Dest)) + return true; + return false; +} + +bool MIParser::parseLiveoutRegisterMaskOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::kw_liveout)); + const auto *TRI = MF.getSubtarget().getRegisterInfo(); + assert(TRI && "Expected target register info"); + uint32_t *Mask = MF.allocateRegisterMask(TRI->getNumRegs()); + lex(); + if (expectAndConsume(MIToken::lparen)) + return true; + while (true) { + if (Token.isNot(MIToken::NamedRegister)) + return error("expected a named register"); + unsigned Reg = 0; + if (parseRegister(Reg)) + return true; + lex(); + Mask[Reg / 32] |= 1U << (Reg % 32); + // TODO: Report an error if the same register is used more than once. + if (Token.isNot(MIToken::comma)) + break; + lex(); + } + if (expectAndConsume(MIToken::rparen)) + return true; + Dest = MachineOperand::CreateRegLiveOut(Mask); return false; } -bool MIParser::parseMachineOperand(MachineOperand &Dest) { +bool MIParser::parseMachineOperand(MachineOperand &Dest, + Optional<unsigned> &TiedDefIdx) { switch (Token.kind()) { case MIToken::kw_implicit: case MIToken::kw_implicit_define: + case MIToken::kw_def: case MIToken::kw_dead: case MIToken::kw_killed: case MIToken::kw_undef: + case MIToken::kw_internal: + case MIToken::kw_early_clobber: + case MIToken::kw_debug_use: case MIToken::underscore: case MIToken::NamedRegister: case MIToken::VirtualRegister: - return parseRegisterOperand(Dest); + return parseRegisterOperand(Dest, TiedDefIdx); case MIToken::IntegerLiteral: return parseImmediateOperand(Dest); + case MIToken::IntegerType: + return parseTypedImmediateOperand(Dest); + case MIToken::kw_half: + case MIToken::kw_float: + case MIToken::kw_double: + case MIToken::kw_x86_fp80: + case MIToken::kw_fp128: + case MIToken::kw_ppc_fp128: + return parseFPImmediateOperand(Dest); case MIToken::MachineBasicBlock: return parseMBBOperand(Dest); + case MIToken::StackObject: + return parseStackObjectOperand(Dest); + case MIToken::FixedStackObject: + return parseFixedStackObjectOperand(Dest); case MIToken::GlobalValue: case MIToken::NamedGlobalValue: return parseGlobalAddressOperand(Dest); + case MIToken::ConstantPoolItem: + return parseConstantPoolIndexOperand(Dest); + case MIToken::JumpTableIndex: + return parseJumpTableIndexOperand(Dest); + case MIToken::ExternalSymbol: + return parseExternalSymbolOperand(Dest); + case MIToken::exclaim: + return parseMetadataOperand(Dest); + case MIToken::kw_cfi_same_value: + case MIToken::kw_cfi_offset: + case MIToken::kw_cfi_def_cfa_register: + case MIToken::kw_cfi_def_cfa_offset: + case MIToken::kw_cfi_def_cfa: + return parseCFIOperand(Dest); + case MIToken::kw_blockaddress: + return parseBlockAddressOperand(Dest); + case MIToken::kw_target_index: + return parseTargetIndexOperand(Dest); + case MIToken::kw_liveout: + return parseLiveoutRegisterMaskOperand(Dest); case MIToken::Error: return true; case MIToken::Identifier: @@ -496,12 +1430,314 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest) { } // fallthrough default: - // TODO: parse the other machine operands. + // FIXME: Parse the MCSymbol machine operand. return error("expected a machine operand"); } return false; } +bool MIParser::parseMachineOperandAndTargetFlags( + MachineOperand &Dest, Optional<unsigned> &TiedDefIdx) { + unsigned TF = 0; + bool HasTargetFlags = false; + if (Token.is(MIToken::kw_target_flags)) { + HasTargetFlags = true; + lex(); + if (expectAndConsume(MIToken::lparen)) + return true; + if (Token.isNot(MIToken::Identifier)) + return error("expected the name of the target flag"); + if (getDirectTargetFlag(Token.stringValue(), TF)) { + if (getBitmaskTargetFlag(Token.stringValue(), TF)) + return error("use of undefined target flag '" + Token.stringValue() + + "'"); + } + lex(); + while (Token.is(MIToken::comma)) { + lex(); + if (Token.isNot(MIToken::Identifier)) + return error("expected the name of the target flag"); + unsigned BitFlag = 0; + if (getBitmaskTargetFlag(Token.stringValue(), BitFlag)) + return error("use of undefined target flag '" + Token.stringValue() + + "'"); + // TODO: Report an error when using a duplicate bit target flag. + TF |= BitFlag; + lex(); + } + if (expectAndConsume(MIToken::rparen)) + return true; + } + auto Loc = Token.location(); + if (parseMachineOperand(Dest, TiedDefIdx)) + return true; + if (!HasTargetFlags) + return false; + if (Dest.isReg()) + return error(Loc, "register operands can't have target flags"); + Dest.setTargetFlags(TF); + return false; +} + +bool MIParser::parseOffset(int64_t &Offset) { + if (Token.isNot(MIToken::plus) && Token.isNot(MIToken::minus)) + return false; + StringRef Sign = Token.range(); + bool IsNegative = Token.is(MIToken::minus); + lex(); + if (Token.isNot(MIToken::IntegerLiteral)) + return error("expected an integer literal after '" + Sign + "'"); + if (Token.integerValue().getMinSignedBits() > 64) + return error("expected 64-bit integer (too large)"); + Offset = Token.integerValue().getExtValue(); + if (IsNegative) + Offset = -Offset; + lex(); + return false; +} + +bool MIParser::parseAlignment(unsigned &Alignment) { + assert(Token.is(MIToken::kw_align)); + lex(); + if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned()) + return error("expected an integer literal after 'align'"); + if (getUnsigned(Alignment)) + return true; + lex(); + return false; +} + +bool MIParser::parseOperandsOffset(MachineOperand &Op) { + int64_t Offset = 0; + if (parseOffset(Offset)) + return true; + Op.setOffset(Offset); + return false; +} + +bool MIParser::parseIRValue(const Value *&V) { + switch (Token.kind()) { + case MIToken::NamedIRValue: { + V = MF.getFunction()->getValueSymbolTable().lookup(Token.stringValue()); + break; + } + case MIToken::IRValue: { + unsigned SlotNumber = 0; + if (getUnsigned(SlotNumber)) + return true; + V = getIRValue(SlotNumber); + break; + } + case MIToken::NamedGlobalValue: + case MIToken::GlobalValue: { + GlobalValue *GV = nullptr; + if (parseGlobalValue(GV)) + return true; + V = GV; + break; + } + case MIToken::QuotedIRValue: { + const Constant *C = nullptr; + if (parseIRConstant(Token.location(), Token.stringValue(), C)) + return true; + V = C; + break; + } + default: + llvm_unreachable("The current token should be an IR block reference"); + } + if (!V) + return error(Twine("use of undefined IR value '") + Token.range() + "'"); + return false; +} + +bool MIParser::getUint64(uint64_t &Result) { + assert(Token.hasIntegerValue()); + if (Token.integerValue().getActiveBits() > 64) + return error("expected 64-bit integer (too large)"); + Result = Token.integerValue().getZExtValue(); + return false; +} + +bool MIParser::parseMemoryOperandFlag(unsigned &Flags) { + const unsigned OldFlags = Flags; + switch (Token.kind()) { + case MIToken::kw_volatile: + Flags |= MachineMemOperand::MOVolatile; + break; + case MIToken::kw_non_temporal: + Flags |= MachineMemOperand::MONonTemporal; + break; + case MIToken::kw_invariant: + Flags |= MachineMemOperand::MOInvariant; + break; + // TODO: parse the target specific memory operand flags. + default: + llvm_unreachable("The current token should be a memory operand flag"); + } + if (OldFlags == Flags) + // We know that the same flag is specified more than once when the flags + // weren't modified. + return error("duplicate '" + Token.stringValue() + "' memory operand flag"); + lex(); + return false; +} + +bool MIParser::parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV) { + switch (Token.kind()) { + case MIToken::kw_stack: + PSV = MF.getPSVManager().getStack(); + break; + case MIToken::kw_got: + PSV = MF.getPSVManager().getGOT(); + break; + case MIToken::kw_jump_table: + PSV = MF.getPSVManager().getJumpTable(); + break; + case MIToken::kw_constant_pool: + PSV = MF.getPSVManager().getConstantPool(); + break; + case MIToken::FixedStackObject: { + int FI; + if (parseFixedStackFrameIndex(FI)) + return true; + PSV = MF.getPSVManager().getFixedStack(FI); + // The token was already consumed, so use return here instead of break. + return false; + } + case MIToken::kw_call_entry: { + lex(); + switch (Token.kind()) { + case MIToken::GlobalValue: + case MIToken::NamedGlobalValue: { + GlobalValue *GV = nullptr; + if (parseGlobalValue(GV)) + return true; + PSV = MF.getPSVManager().getGlobalValueCallEntry(GV); + break; + } + case MIToken::ExternalSymbol: + PSV = MF.getPSVManager().getExternalSymbolCallEntry( + MF.createExternalSymbolName(Token.stringValue())); + break; + default: + return error( + "expected a global value or an external symbol after 'call-entry'"); + } + break; + } + default: + llvm_unreachable("The current token should be pseudo source value"); + } + lex(); + return false; +} + +bool MIParser::parseMachinePointerInfo(MachinePointerInfo &Dest) { + if (Token.is(MIToken::kw_constant_pool) || Token.is(MIToken::kw_stack) || + Token.is(MIToken::kw_got) || Token.is(MIToken::kw_jump_table) || + Token.is(MIToken::FixedStackObject) || Token.is(MIToken::kw_call_entry)) { + const PseudoSourceValue *PSV = nullptr; + if (parseMemoryPseudoSourceValue(PSV)) + return true; + int64_t Offset = 0; + if (parseOffset(Offset)) + return true; + Dest = MachinePointerInfo(PSV, Offset); + return false; + } + if (Token.isNot(MIToken::NamedIRValue) && Token.isNot(MIToken::IRValue) && + Token.isNot(MIToken::GlobalValue) && + Token.isNot(MIToken::NamedGlobalValue) && + Token.isNot(MIToken::QuotedIRValue)) + return error("expected an IR value reference"); + const Value *V = nullptr; + if (parseIRValue(V)) + return true; + if (!V->getType()->isPointerTy()) + return error("expected a pointer IR value"); + lex(); + int64_t Offset = 0; + if (parseOffset(Offset)) + return true; + Dest = MachinePointerInfo(V, Offset); + return false; +} + +bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { + if (expectAndConsume(MIToken::lparen)) + return true; + unsigned Flags = 0; + while (Token.isMemoryOperandFlag()) { + if (parseMemoryOperandFlag(Flags)) + return true; + } + if (Token.isNot(MIToken::Identifier) || + (Token.stringValue() != "load" && Token.stringValue() != "store")) + return error("expected 'load' or 'store' memory operation"); + if (Token.stringValue() == "load") + Flags |= MachineMemOperand::MOLoad; + else + Flags |= MachineMemOperand::MOStore; + lex(); + + if (Token.isNot(MIToken::IntegerLiteral)) + return error("expected the size integer literal after memory operation"); + uint64_t Size; + if (getUint64(Size)) + return true; + lex(); + + const char *Word = Flags & MachineMemOperand::MOLoad ? "from" : "into"; + if (Token.isNot(MIToken::Identifier) || Token.stringValue() != Word) + return error(Twine("expected '") + Word + "'"); + lex(); + + MachinePointerInfo Ptr = MachinePointerInfo(); + if (parseMachinePointerInfo(Ptr)) + return true; + unsigned BaseAlignment = Size; + AAMDNodes AAInfo; + MDNode *Range = nullptr; + while (consumeIfPresent(MIToken::comma)) { + switch (Token.kind()) { + case MIToken::kw_align: + if (parseAlignment(BaseAlignment)) + return true; + break; + case MIToken::md_tbaa: + lex(); + if (parseMDNode(AAInfo.TBAA)) + return true; + break; + case MIToken::md_alias_scope: + lex(); + if (parseMDNode(AAInfo.Scope)) + return true; + break; + case MIToken::md_noalias: + lex(); + if (parseMDNode(AAInfo.NoAlias)) + return true; + break; + case MIToken::md_range: + lex(); + if (parseMDNode(Range)) + return true; + break; + // TODO: Report an error on duplicate metadata nodes. + default: + return error("expected 'align' or '!tbaa' or '!alias.scope' or " + "'!noalias' or '!range'"); + } + } + if (expectAndConsume(MIToken::rparen)) + return true; + Dest = + MF.getMachineMemOperand(Ptr, Flags, Size, BaseAlignment, AAInfo, Range); + return false; +} + void MIParser::initNames2InstrOpCodes() { if (!Names2InstrOpCodes.empty()) return; @@ -583,18 +1819,162 @@ unsigned MIParser::getSubRegIndex(StringRef Name) { return SubRegInfo->getValue(); } -bool llvm::parseMachineInstr(MachineInstr *&MI, SourceMgr &SM, - MachineFunction &MF, StringRef Src, - const PerFunctionMIParsingState &PFS, - const SlotMapping &IRSlots, SMDiagnostic &Error) { - return MIParser(SM, MF, Error, Src, PFS, IRSlots).parse(MI); +static void initSlots2BasicBlocks( + const Function &F, + DenseMap<unsigned, const BasicBlock *> &Slots2BasicBlocks) { + ModuleSlotTracker MST(F.getParent(), /*ShouldInitializeAllMetadata=*/false); + MST.incorporateFunction(F); + for (auto &BB : F) { + if (BB.hasName()) + continue; + int Slot = MST.getLocalSlot(&BB); + if (Slot == -1) + continue; + Slots2BasicBlocks.insert(std::make_pair(unsigned(Slot), &BB)); + } +} + +static const BasicBlock *getIRBlockFromSlot( + unsigned Slot, + const DenseMap<unsigned, const BasicBlock *> &Slots2BasicBlocks) { + auto BlockInfo = Slots2BasicBlocks.find(Slot); + if (BlockInfo == Slots2BasicBlocks.end()) + return nullptr; + return BlockInfo->second; +} + +const BasicBlock *MIParser::getIRBlock(unsigned Slot) { + if (Slots2BasicBlocks.empty()) + initSlots2BasicBlocks(*MF.getFunction(), Slots2BasicBlocks); + return getIRBlockFromSlot(Slot, Slots2BasicBlocks); +} + +const BasicBlock *MIParser::getIRBlock(unsigned Slot, const Function &F) { + if (&F == MF.getFunction()) + return getIRBlock(Slot); + DenseMap<unsigned, const BasicBlock *> CustomSlots2BasicBlocks; + initSlots2BasicBlocks(F, CustomSlots2BasicBlocks); + return getIRBlockFromSlot(Slot, CustomSlots2BasicBlocks); +} + +static void mapValueToSlot(const Value *V, ModuleSlotTracker &MST, + DenseMap<unsigned, const Value *> &Slots2Values) { + int Slot = MST.getLocalSlot(V); + if (Slot == -1) + return; + Slots2Values.insert(std::make_pair(unsigned(Slot), V)); +} + +/// Creates the mapping from slot numbers to function's unnamed IR values. +static void initSlots2Values(const Function &F, + DenseMap<unsigned, const Value *> &Slots2Values) { + ModuleSlotTracker MST(F.getParent(), /*ShouldInitializeAllMetadata=*/false); + MST.incorporateFunction(F); + for (const auto &Arg : F.args()) + mapValueToSlot(&Arg, MST, Slots2Values); + for (const auto &BB : F) { + mapValueToSlot(&BB, MST, Slots2Values); + for (const auto &I : BB) + mapValueToSlot(&I, MST, Slots2Values); + } +} + +const Value *MIParser::getIRValue(unsigned Slot) { + if (Slots2Values.empty()) + initSlots2Values(*MF.getFunction(), Slots2Values); + auto ValueInfo = Slots2Values.find(Slot); + if (ValueInfo == Slots2Values.end()) + return nullptr; + return ValueInfo->second; +} + +void MIParser::initNames2TargetIndices() { + if (!Names2TargetIndices.empty()) + return; + const auto *TII = MF.getSubtarget().getInstrInfo(); + assert(TII && "Expected target instruction info"); + auto Indices = TII->getSerializableTargetIndices(); + for (const auto &I : Indices) + Names2TargetIndices.insert(std::make_pair(StringRef(I.second), I.first)); +} + +bool MIParser::getTargetIndex(StringRef Name, int &Index) { + initNames2TargetIndices(); + auto IndexInfo = Names2TargetIndices.find(Name); + if (IndexInfo == Names2TargetIndices.end()) + return true; + Index = IndexInfo->second; + return false; +} + +void MIParser::initNames2DirectTargetFlags() { + if (!Names2DirectTargetFlags.empty()) + return; + const auto *TII = MF.getSubtarget().getInstrInfo(); + assert(TII && "Expected target instruction info"); + auto Flags = TII->getSerializableDirectMachineOperandTargetFlags(); + for (const auto &I : Flags) + Names2DirectTargetFlags.insert( + std::make_pair(StringRef(I.second), I.first)); +} + +bool MIParser::getDirectTargetFlag(StringRef Name, unsigned &Flag) { + initNames2DirectTargetFlags(); + auto FlagInfo = Names2DirectTargetFlags.find(Name); + if (FlagInfo == Names2DirectTargetFlags.end()) + return true; + Flag = FlagInfo->second; + return false; +} + +void MIParser::initNames2BitmaskTargetFlags() { + if (!Names2BitmaskTargetFlags.empty()) + return; + const auto *TII = MF.getSubtarget().getInstrInfo(); + assert(TII && "Expected target instruction info"); + auto Flags = TII->getSerializableBitmaskMachineOperandTargetFlags(); + for (const auto &I : Flags) + Names2BitmaskTargetFlags.insert( + std::make_pair(StringRef(I.second), I.first)); +} + +bool MIParser::getBitmaskTargetFlag(StringRef Name, unsigned &Flag) { + initNames2BitmaskTargetFlags(); + auto FlagInfo = Names2BitmaskTargetFlags.find(Name); + if (FlagInfo == Names2BitmaskTargetFlags.end()) + return true; + Flag = FlagInfo->second; + return false; +} + +bool llvm::parseMachineBasicBlockDefinitions(MachineFunction &MF, StringRef Src, + PerFunctionMIParsingState &PFS, + const SlotMapping &IRSlots, + SMDiagnostic &Error) { + SourceMgr SM; + SM.AddNewSourceBuffer( + MemoryBuffer::getMemBuffer(Src, "", /*RequiresNullTerminator=*/false), + SMLoc()); + return MIParser(SM, MF, Error, Src, PFS, IRSlots) + .parseBasicBlockDefinitions(PFS.MBBSlots); +} + +bool llvm::parseMachineInstructions(MachineFunction &MF, StringRef Src, + const PerFunctionMIParsingState &PFS, + const SlotMapping &IRSlots, + SMDiagnostic &Error) { + SourceMgr SM; + SM.AddNewSourceBuffer( + MemoryBuffer::getMemBuffer(Src, "", /*RequiresNullTerminator=*/false), + SMLoc()); + return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseBasicBlocks(); } bool llvm::parseMBBReference(MachineBasicBlock *&MBB, SourceMgr &SM, MachineFunction &MF, StringRef Src, const PerFunctionMIParsingState &PFS, const SlotMapping &IRSlots, SMDiagnostic &Error) { - return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseMBB(MBB); + return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseStandaloneMBB(MBB); } bool llvm::parseNamedRegisterReference(unsigned &Reg, SourceMgr &SM, @@ -602,5 +1982,30 @@ bool llvm::parseNamedRegisterReference(unsigned &Reg, SourceMgr &SM, const PerFunctionMIParsingState &PFS, const SlotMapping &IRSlots, SMDiagnostic &Error) { - return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseNamedRegister(Reg); + return MIParser(SM, MF, Error, Src, PFS, IRSlots) + .parseStandaloneNamedRegister(Reg); +} + +bool llvm::parseVirtualRegisterReference(unsigned &Reg, SourceMgr &SM, + MachineFunction &MF, StringRef Src, + const PerFunctionMIParsingState &PFS, + const SlotMapping &IRSlots, + SMDiagnostic &Error) { + return MIParser(SM, MF, Error, Src, PFS, IRSlots) + .parseStandaloneVirtualRegister(Reg); +} + +bool llvm::parseStackObjectReference(int &FI, SourceMgr &SM, + MachineFunction &MF, StringRef Src, + const PerFunctionMIParsingState &PFS, + const SlotMapping &IRSlots, + SMDiagnostic &Error) { + return MIParser(SM, MF, Error, Src, PFS, IRSlots) + .parseStandaloneStackObject(FI); +} + +bool llvm::parseMDNode(MDNode *&Node, SourceMgr &SM, MachineFunction &MF, + StringRef Src, const PerFunctionMIParsingState &PFS, + const SlotMapping &IRSlots, SMDiagnostic &Error) { + return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseStandaloneMDNode(Node); } diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h index fca4c4e6f885..8aef704ab36c 100644 --- a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h +++ b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h @@ -19,9 +19,11 @@ namespace llvm { +class BasicBlock; class MachineBasicBlock; class MachineInstr; class MachineFunction; +class MDNode; struct SlotMapping; class SMDiagnostic; class SourceMgr; @@ -29,11 +31,42 @@ class SourceMgr; struct PerFunctionMIParsingState { DenseMap<unsigned, MachineBasicBlock *> MBBSlots; DenseMap<unsigned, unsigned> VirtualRegisterSlots; + DenseMap<unsigned, int> FixedStackObjectSlots; + DenseMap<unsigned, int> StackObjectSlots; + DenseMap<unsigned, unsigned> ConstantPoolSlots; + DenseMap<unsigned, unsigned> JumpTableSlots; }; -bool parseMachineInstr(MachineInstr *&MI, SourceMgr &SM, MachineFunction &MF, - StringRef Src, const PerFunctionMIParsingState &PFS, - const SlotMapping &IRSlots, SMDiagnostic &Error); +/// Parse the machine basic block definitions, and skip the machine +/// instructions. +/// +/// This function runs the first parsing pass on the machine function's body. +/// It parses only the machine basic block definitions and creates the machine +/// basic blocks in the given machine function. +/// +/// The machine instructions aren't parsed during the first pass because all +/// the machine basic blocks aren't defined yet - this makes it impossible to +/// resolve the machine basic block references. +/// +/// Return true if an error occurred. +bool parseMachineBasicBlockDefinitions(MachineFunction &MF, StringRef Src, + PerFunctionMIParsingState &PFS, + const SlotMapping &IRSlots, + SMDiagnostic &Error); + +/// Parse the machine instructions. +/// +/// This function runs the second parsing pass on the machine function's body. +/// It skips the machine basic block definitions and parses only the machine +/// instructions and basic block attributes like liveins and successors. +/// +/// The second parsing pass assumes that the first parsing pass already ran +/// on the given source string. +/// +/// Return true if an error occurred. +bool parseMachineInstructions(MachineFunction &MF, StringRef Src, + const PerFunctionMIParsingState &PFS, + const SlotMapping &IRSlots, SMDiagnostic &Error); bool parseMBBReference(MachineBasicBlock *&MBB, SourceMgr &SM, MachineFunction &MF, StringRef Src, @@ -46,6 +79,21 @@ bool parseNamedRegisterReference(unsigned &Reg, SourceMgr &SM, const SlotMapping &IRSlots, SMDiagnostic &Error); +bool parseVirtualRegisterReference(unsigned &Reg, SourceMgr &SM, + MachineFunction &MF, StringRef Src, + const PerFunctionMIParsingState &PFS, + const SlotMapping &IRSlots, + SMDiagnostic &Error); + +bool parseStackObjectReference(int &FI, SourceMgr &SM, MachineFunction &MF, + StringRef Src, + const PerFunctionMIParsingState &PFS, + const SlotMapping &IRSlots, SMDiagnostic &Error); + +bool parseMDNode(MDNode *&Node, SourceMgr &SM, MachineFunction &MF, + StringRef Src, const PerFunctionMIParsingState &PFS, + const SlotMapping &IRSlots, SMDiagnostic &Error); + } // end namespace llvm #endif diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp index 16b0e1655891..422efbc5ce57 100644 --- a/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -20,8 +20,10 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/AsmParser/Parser.h" #include "llvm/AsmParser/SlotMapping.h" +#include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MIRYamlMapping.h" #include "llvm/IR/BasicBlock.h" @@ -95,30 +97,53 @@ public: /// Return true if error occurred. bool initializeMachineFunction(MachineFunction &MF); - /// Initialize the machine basic block using it's YAML representation. - /// - /// Return true if an error occurred. - bool initializeMachineBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB, - const yaml::MachineBasicBlock &YamlMBB, - const PerFunctionMIParsingState &PFS); + bool initializeRegisterInfo(MachineFunction &MF, + const yaml::MachineFunction &YamlMF, + PerFunctionMIParsingState &PFS); + + void inferRegisterInfo(MachineFunction &MF, + const yaml::MachineFunction &YamlMF); + + bool initializeFrameInfo(MachineFunction &MF, + const yaml::MachineFunction &YamlMF, + PerFunctionMIParsingState &PFS); + + bool parseCalleeSavedRegister(MachineFunction &MF, + PerFunctionMIParsingState &PFS, + std::vector<CalleeSavedInfo> &CSIInfo, + const yaml::StringValue &RegisterSource, + int FrameIdx); + + bool parseStackObjectsDebugInfo(MachineFunction &MF, + PerFunctionMIParsingState &PFS, + const yaml::MachineStackObject &Object, + int FrameIdx); - bool - initializeRegisterInfo(const MachineFunction &MF, - MachineRegisterInfo &RegInfo, - const yaml::MachineFunction &YamlMF, - DenseMap<unsigned, unsigned> &VirtualRegisterSlots); + bool initializeConstantPool(MachineConstantPool &ConstantPool, + const yaml::MachineFunction &YamlMF, + const MachineFunction &MF, + DenseMap<unsigned, unsigned> &ConstantPoolSlots); - bool initializeFrameInfo(MachineFrameInfo &MFI, - const yaml::MachineFunction &YamlMF); + bool initializeJumpTableInfo(MachineFunction &MF, + const yaml::MachineJumpTable &YamlJTI, + PerFunctionMIParsingState &PFS); private: + bool parseMDNode(MDNode *&Node, const yaml::StringValue &Source, + MachineFunction &MF, const PerFunctionMIParsingState &PFS); + + bool parseMBBReference(MachineBasicBlock *&MBB, + const yaml::StringValue &Source, MachineFunction &MF, + const PerFunctionMIParsingState &PFS); + /// Return a MIR diagnostic converted from an MI string diagnostic. SMDiagnostic diagFromMIStringDiag(const SMDiagnostic &Error, SMRange SourceRange); - /// Return a MIR diagnostic converted from an LLVM assembly diagnostic. - SMDiagnostic diagFromLLVMAssemblyDiag(const SMDiagnostic &Error, - SMRange SourceRange); + /// Return a MIR diagnostic converted from a diagnostic located in a YAML + /// block scalar string. + SMDiagnostic diagFromBlockStringDiag(const SMDiagnostic &Error, + SMRange SourceRange); /// Create an empty function with the given name. void createDummyFunction(StringRef Name, Module &M); @@ -200,7 +225,7 @@ std::unique_ptr<Module> MIRParserImpl::parse() { M = parseAssembly(MemoryBufferRef(BSN->getValue(), Filename), Error, Context, &IRSlots); if (!M) { - reportDiagnostic(diagFromLLVMAssemblyDiag(Error, BSN->getSourceRange())); + reportDiagnostic(diagFromBlockStringDiag(Error, BSN->getSourceRange())); return M; } In.nextDocument(); @@ -261,88 +286,56 @@ bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) { MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice); MF.setHasInlineAsm(YamlMF.HasInlineAsm); PerFunctionMIParsingState PFS; - if (initializeRegisterInfo(MF, MF.getRegInfo(), YamlMF, - PFS.VirtualRegisterSlots)) - return true; - if (initializeFrameInfo(*MF.getFrameInfo(), YamlMF)) + if (initializeRegisterInfo(MF, YamlMF, PFS)) return true; - - const auto &F = *MF.getFunction(); - for (const auto &YamlMBB : YamlMF.BasicBlocks) { - const BasicBlock *BB = nullptr; - const yaml::StringValue &Name = YamlMBB.Name; - if (!Name.Value.empty()) { - BB = dyn_cast_or_null<BasicBlock>( - F.getValueSymbolTable().lookup(Name.Value)); - if (!BB) - return error(Name.SourceRange.Start, - Twine("basic block '") + Name.Value + - "' is not defined in the function '" + MF.getName() + - "'"); - } - auto *MBB = MF.CreateMachineBasicBlock(BB); - MF.insert(MF.end(), MBB); - bool WasInserted = - PFS.MBBSlots.insert(std::make_pair(YamlMBB.ID, MBB)).second; - if (!WasInserted) - return error(Twine("redefinition of machine basic block with id #") + - Twine(YamlMBB.ID)); - } - - if (YamlMF.BasicBlocks.empty()) - return error(Twine("machine function '") + Twine(MF.getName()) + - "' requires at least one machine basic block in its body"); - // Initialize the machine basic blocks after creating them all so that the - // machine instructions parser can resolve the MBB references. - unsigned I = 0; - for (const auto &YamlMBB : YamlMF.BasicBlocks) { - if (initializeMachineBasicBlock(MF, *MF.getBlockNumbered(I++), YamlMBB, - PFS)) + if (!YamlMF.Constants.empty()) { + auto *ConstantPool = MF.getConstantPool(); + assert(ConstantPool && "Constant pool must be created"); + if (initializeConstantPool(*ConstantPool, YamlMF, MF, + PFS.ConstantPoolSlots)) return true; } - return false; -} -bool MIRParserImpl::initializeMachineBasicBlock( - MachineFunction &MF, MachineBasicBlock &MBB, - const yaml::MachineBasicBlock &YamlMBB, - const PerFunctionMIParsingState &PFS) { - MBB.setAlignment(YamlMBB.Alignment); - if (YamlMBB.AddressTaken) - MBB.setHasAddressTaken(); - MBB.setIsLandingPad(YamlMBB.IsLandingPad); SMDiagnostic Error; - // Parse the successors. - for (const auto &MBBSource : YamlMBB.Successors) { - MachineBasicBlock *SuccMBB = nullptr; - if (parseMBBReference(SuccMBB, SM, MF, MBBSource.Value, PFS, IRSlots, - Error)) - return error(Error, MBBSource.SourceRange); - // TODO: Report an error when adding the same successor more than once. - MBB.addSuccessor(SuccMBB); - } - // Parse the liveins. - for (const auto &LiveInSource : YamlMBB.LiveIns) { - unsigned Reg = 0; - if (parseNamedRegisterReference(Reg, SM, MF, LiveInSource.Value, PFS, - IRSlots, Error)) - return error(Error, LiveInSource.SourceRange); - MBB.addLiveIn(Reg); + if (parseMachineBasicBlockDefinitions(MF, YamlMF.Body.Value.Value, PFS, + IRSlots, Error)) { + reportDiagnostic( + diagFromBlockStringDiag(Error, YamlMF.Body.Value.SourceRange)); + return true; } - // Parse the instructions. - for (const auto &MISource : YamlMBB.Instructions) { - MachineInstr *MI = nullptr; - if (parseMachineInstr(MI, SM, MF, MISource.Value, PFS, IRSlots, Error)) - return error(Error, MISource.SourceRange); - MBB.insert(MBB.end(), MI); + + if (MF.empty()) + return error(Twine("machine function '") + Twine(MF.getName()) + + "' requires at least one machine basic block in its body"); + // Initialize the frame information after creating all the MBBs so that the + // MBB references in the frame information can be resolved. + if (initializeFrameInfo(MF, YamlMF, PFS)) + return true; + // Initialize the jump table after creating all the MBBs so that the MBB + // references can be resolved. + if (!YamlMF.JumpTableInfo.Entries.empty() && + initializeJumpTableInfo(MF, YamlMF.JumpTableInfo, PFS)) + return true; + // Parse the machine instructions after creating all of the MBBs so that the + // parser can resolve the MBB references. + if (parseMachineInstructions(MF, YamlMF.Body.Value.Value, PFS, IRSlots, + Error)) { + reportDiagnostic( + diagFromBlockStringDiag(Error, YamlMF.Body.Value.SourceRange)); + return true; } + inferRegisterInfo(MF, YamlMF); + // FIXME: This is a temporary workaround until the reserved registers can be + // serialized. + MF.getRegInfo().freezeReservedRegs(MF); + MF.verify(); return false; } -bool MIRParserImpl::initializeRegisterInfo( - const MachineFunction &MF, MachineRegisterInfo &RegInfo, - const yaml::MachineFunction &YamlMF, - DenseMap<unsigned, unsigned> &VirtualRegisterSlots) { +bool MIRParserImpl::initializeRegisterInfo(MachineFunction &MF, + const yaml::MachineFunction &YamlMF, + PerFunctionMIParsingState &PFS) { + MachineRegisterInfo &RegInfo = MF.getRegInfo(); assert(RegInfo.isSSA()); if (!YamlMF.IsSSA) RegInfo.leaveSSA(); @@ -351,6 +344,7 @@ bool MIRParserImpl::initializeRegisterInfo( RegInfo.invalidateLiveness(); RegInfo.enableSubRegLiveness(YamlMF.TracksSubRegLiveness); + SMDiagnostic Error; // Parse the virtual register information. for (const auto &VReg : YamlMF.VirtualRegisters) { const auto *RC = getRegClass(MF, VReg.Class.Value); @@ -359,15 +353,71 @@ bool MIRParserImpl::initializeRegisterInfo( Twine("use of undefined register class '") + VReg.Class.Value + "'"); unsigned Reg = RegInfo.createVirtualRegister(RC); - // TODO: Report an error when the same virtual register with the same ID is - // redefined. - VirtualRegisterSlots.insert(std::make_pair(VReg.ID, Reg)); + if (!PFS.VirtualRegisterSlots.insert(std::make_pair(VReg.ID.Value, Reg)) + .second) + return error(VReg.ID.SourceRange.Start, + Twine("redefinition of virtual register '%") + + Twine(VReg.ID.Value) + "'"); + if (!VReg.PreferredRegister.Value.empty()) { + unsigned PreferredReg = 0; + if (parseNamedRegisterReference(PreferredReg, SM, MF, + VReg.PreferredRegister.Value, PFS, + IRSlots, Error)) + return error(Error, VReg.PreferredRegister.SourceRange); + RegInfo.setSimpleHint(Reg, PreferredReg); + } } + + // Parse the liveins. + for (const auto &LiveIn : YamlMF.LiveIns) { + unsigned Reg = 0; + if (parseNamedRegisterReference(Reg, SM, MF, LiveIn.Register.Value, PFS, + IRSlots, Error)) + return error(Error, LiveIn.Register.SourceRange); + unsigned VReg = 0; + if (!LiveIn.VirtualRegister.Value.empty()) { + if (parseVirtualRegisterReference( + VReg, SM, MF, LiveIn.VirtualRegister.Value, PFS, IRSlots, Error)) + return error(Error, LiveIn.VirtualRegister.SourceRange); + } + RegInfo.addLiveIn(Reg, VReg); + } + + // Parse the callee saved register mask. + BitVector CalleeSavedRegisterMask(RegInfo.getUsedPhysRegsMask().size()); + if (!YamlMF.CalleeSavedRegisters) + return false; + for (const auto &RegSource : YamlMF.CalleeSavedRegisters.getValue()) { + unsigned Reg = 0; + if (parseNamedRegisterReference(Reg, SM, MF, RegSource.Value, PFS, IRSlots, + Error)) + return error(Error, RegSource.SourceRange); + CalleeSavedRegisterMask[Reg] = true; + } + RegInfo.setUsedPhysRegMask(CalleeSavedRegisterMask.flip()); return false; } -bool MIRParserImpl::initializeFrameInfo(MachineFrameInfo &MFI, - const yaml::MachineFunction &YamlMF) { +void MIRParserImpl::inferRegisterInfo(MachineFunction &MF, + const yaml::MachineFunction &YamlMF) { + if (YamlMF.CalleeSavedRegisters) + return; + for (const MachineBasicBlock &MBB : MF) { + for (const MachineInstr &MI : MBB) { + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isRegMask()) + continue; + MF.getRegInfo().addPhysRegsUsedFromRegMask(MO.getRegMask()); + } + } + } +} + +bool MIRParserImpl::initializeFrameInfo(MachineFunction &MF, + const yaml::MachineFunction &YamlMF, + PerFunctionMIParsingState &PFS) { + MachineFrameInfo &MFI = *MF.getFrameInfo(); + const Function &F = *MF.getFunction(); const yaml::MachineFrameInfo &YamlMFI = YamlMF.FrameInfo; MFI.setFrameAddressIsTaken(YamlMFI.IsFrameAddressTaken); MFI.setReturnAddressIsTaken(YamlMFI.IsReturnAddressTaken); @@ -383,7 +433,20 @@ bool MIRParserImpl::initializeFrameInfo(MachineFrameInfo &MFI, MFI.setHasOpaqueSPAdjustment(YamlMFI.HasOpaqueSPAdjustment); MFI.setHasVAStart(YamlMFI.HasVAStart); MFI.setHasMustTailInVarArgFunc(YamlMFI.HasMustTailInVarArgFunc); + if (!YamlMFI.SavePoint.Value.empty()) { + MachineBasicBlock *MBB = nullptr; + if (parseMBBReference(MBB, YamlMFI.SavePoint, MF, PFS)) + return true; + MFI.setSavePoint(MBB); + } + if (!YamlMFI.RestorePoint.Value.empty()) { + MachineBasicBlock *MBB = nullptr; + if (parseMBBReference(MBB, YamlMFI.RestorePoint, MF, PFS)) + return true; + MFI.setRestorePoint(MBB); + } + std::vector<CalleeSavedInfo> CSIInfo; // Initialize the fixed frame objects. for (const auto &Object : YamlMF.FixedStackObjects) { int ObjectIdx; @@ -393,27 +456,190 @@ bool MIRParserImpl::initializeFrameInfo(MachineFrameInfo &MFI, else ObjectIdx = MFI.CreateFixedSpillStackObject(Object.Size, Object.Offset); MFI.setObjectAlignment(ObjectIdx, Object.Alignment); - // TODO: Store the mapping between fixed object IDs and object indices to - // parse fixed stack object references correctly. + if (!PFS.FixedStackObjectSlots.insert(std::make_pair(Object.ID.Value, + ObjectIdx)) + .second) + return error(Object.ID.SourceRange.Start, + Twine("redefinition of fixed stack object '%fixed-stack.") + + Twine(Object.ID.Value) + "'"); + if (parseCalleeSavedRegister(MF, PFS, CSIInfo, Object.CalleeSavedRegister, + ObjectIdx)) + return true; } // Initialize the ordinary frame objects. for (const auto &Object : YamlMF.StackObjects) { int ObjectIdx; + const AllocaInst *Alloca = nullptr; + const yaml::StringValue &Name = Object.Name; + if (!Name.Value.empty()) { + Alloca = dyn_cast_or_null<AllocaInst>( + F.getValueSymbolTable().lookup(Name.Value)); + if (!Alloca) + return error(Name.SourceRange.Start, + "alloca instruction named '" + Name.Value + + "' isn't defined in the function '" + F.getName() + + "'"); + } if (Object.Type == yaml::MachineStackObject::VariableSized) - ObjectIdx = - MFI.CreateVariableSizedObject(Object.Alignment, /*Alloca=*/nullptr); + ObjectIdx = MFI.CreateVariableSizedObject(Object.Alignment, Alloca); else ObjectIdx = MFI.CreateStackObject( Object.Size, Object.Alignment, - Object.Type == yaml::MachineStackObject::SpillSlot); + Object.Type == yaml::MachineStackObject::SpillSlot, Alloca); MFI.setObjectOffset(ObjectIdx, Object.Offset); - // TODO: Store the mapping between object IDs and object indices to parse - // stack object references correctly. + if (!PFS.StackObjectSlots.insert(std::make_pair(Object.ID.Value, ObjectIdx)) + .second) + return error(Object.ID.SourceRange.Start, + Twine("redefinition of stack object '%stack.") + + Twine(Object.ID.Value) + "'"); + if (parseCalleeSavedRegister(MF, PFS, CSIInfo, Object.CalleeSavedRegister, + ObjectIdx)) + return true; + if (Object.LocalOffset) + MFI.mapLocalFrameObject(ObjectIdx, Object.LocalOffset.getValue()); + if (parseStackObjectsDebugInfo(MF, PFS, Object, ObjectIdx)) + return true; + } + MFI.setCalleeSavedInfo(CSIInfo); + if (!CSIInfo.empty()) + MFI.setCalleeSavedInfoValid(true); + + // Initialize the various stack object references after initializing the + // stack objects. + if (!YamlMFI.StackProtector.Value.empty()) { + SMDiagnostic Error; + int FI; + if (parseStackObjectReference(FI, SM, MF, YamlMFI.StackProtector.Value, PFS, + IRSlots, Error)) + return error(Error, YamlMFI.StackProtector.SourceRange); + MFI.setStackProtectorIndex(FI); + } + return false; +} + +bool MIRParserImpl::parseCalleeSavedRegister( + MachineFunction &MF, PerFunctionMIParsingState &PFS, + std::vector<CalleeSavedInfo> &CSIInfo, + const yaml::StringValue &RegisterSource, int FrameIdx) { + if (RegisterSource.Value.empty()) + return false; + unsigned Reg = 0; + SMDiagnostic Error; + if (parseNamedRegisterReference(Reg, SM, MF, RegisterSource.Value, PFS, + IRSlots, Error)) + return error(Error, RegisterSource.SourceRange); + CSIInfo.push_back(CalleeSavedInfo(Reg, FrameIdx)); + return false; +} + +/// Verify that given node is of a certain type. Return true on error. +template <typename T> +static bool typecheckMDNode(T *&Result, MDNode *Node, + const yaml::StringValue &Source, + StringRef TypeString, MIRParserImpl &Parser) { + if (!Node) + return false; + Result = dyn_cast<T>(Node); + if (!Result) + return Parser.error(Source.SourceRange.Start, + "expected a reference to a '" + TypeString + + "' metadata node"); + return false; +} + +bool MIRParserImpl::parseStackObjectsDebugInfo( + MachineFunction &MF, PerFunctionMIParsingState &PFS, + const yaml::MachineStackObject &Object, int FrameIdx) { + // Debug information can only be attached to stack objects; Fixed stack + // objects aren't supported. + assert(FrameIdx >= 0 && "Expected a stack object frame index"); + MDNode *Var = nullptr, *Expr = nullptr, *Loc = nullptr; + if (parseMDNode(Var, Object.DebugVar, MF, PFS) || + parseMDNode(Expr, Object.DebugExpr, MF, PFS) || + parseMDNode(Loc, Object.DebugLoc, MF, PFS)) + return true; + if (!Var && !Expr && !Loc) + return false; + DILocalVariable *DIVar = nullptr; + DIExpression *DIExpr = nullptr; + DILocation *DILoc = nullptr; + if (typecheckMDNode(DIVar, Var, Object.DebugVar, "DILocalVariable", *this) || + typecheckMDNode(DIExpr, Expr, Object.DebugExpr, "DIExpression", *this) || + typecheckMDNode(DILoc, Loc, Object.DebugLoc, "DILocation", *this)) + return true; + MF.getMMI().setVariableDbgInfo(DIVar, DIExpr, unsigned(FrameIdx), DILoc); + return false; +} + +bool MIRParserImpl::parseMDNode(MDNode *&Node, const yaml::StringValue &Source, + MachineFunction &MF, + const PerFunctionMIParsingState &PFS) { + if (Source.Value.empty()) + return false; + SMDiagnostic Error; + if (llvm::parseMDNode(Node, SM, MF, Source.Value, PFS, IRSlots, Error)) + return error(Error, Source.SourceRange); + return false; +} + +bool MIRParserImpl::initializeConstantPool( + MachineConstantPool &ConstantPool, const yaml::MachineFunction &YamlMF, + const MachineFunction &MF, + DenseMap<unsigned, unsigned> &ConstantPoolSlots) { + const auto &M = *MF.getFunction()->getParent(); + SMDiagnostic Error; + for (const auto &YamlConstant : YamlMF.Constants) { + const Constant *Value = dyn_cast_or_null<Constant>( + parseConstantValue(YamlConstant.Value.Value, Error, M)); + if (!Value) + return error(Error, YamlConstant.Value.SourceRange); + unsigned Alignment = + YamlConstant.Alignment + ? YamlConstant.Alignment + : M.getDataLayout().getPrefTypeAlignment(Value->getType()); + unsigned Index = ConstantPool.getConstantPoolIndex(Value, Alignment); + if (!ConstantPoolSlots.insert(std::make_pair(YamlConstant.ID.Value, Index)) + .second) + return error(YamlConstant.ID.SourceRange.Start, + Twine("redefinition of constant pool item '%const.") + + Twine(YamlConstant.ID.Value) + "'"); } return false; } +bool MIRParserImpl::initializeJumpTableInfo( + MachineFunction &MF, const yaml::MachineJumpTable &YamlJTI, + PerFunctionMIParsingState &PFS) { + MachineJumpTableInfo *JTI = MF.getOrCreateJumpTableInfo(YamlJTI.Kind); + for (const auto &Entry : YamlJTI.Entries) { + std::vector<MachineBasicBlock *> Blocks; + for (const auto &MBBSource : Entry.Blocks) { + MachineBasicBlock *MBB = nullptr; + if (parseMBBReference(MBB, MBBSource.Value, MF, PFS)) + return true; + Blocks.push_back(MBB); + } + unsigned Index = JTI->createJumpTableIndex(Blocks); + if (!PFS.JumpTableSlots.insert(std::make_pair(Entry.ID.Value, Index)) + .second) + return error(Entry.ID.SourceRange.Start, + Twine("redefinition of jump table entry '%jump-table.") + + Twine(Entry.ID.Value) + "'"); + } + return false; +} + +bool MIRParserImpl::parseMBBReference(MachineBasicBlock *&MBB, + const yaml::StringValue &Source, + MachineFunction &MF, + const PerFunctionMIParsingState &PFS) { + SMDiagnostic Error; + if (llvm::parseMBBReference(MBB, SM, MF, Source.Value, PFS, IRSlots, Error)) + return error(Error, Source.SourceRange); + return false; +} + SMDiagnostic MIRParserImpl::diagFromMIStringDiag(const SMDiagnostic &Error, SMRange SourceRange) { assert(SourceRange.isValid() && "Invalid source range"); @@ -430,8 +656,8 @@ SMDiagnostic MIRParserImpl::diagFromMIStringDiag(const SMDiagnostic &Error, Error.getFixIts()); } -SMDiagnostic MIRParserImpl::diagFromLLVMAssemblyDiag(const SMDiagnostic &Error, - SMRange SourceRange) { +SMDiagnostic MIRParserImpl::diagFromBlockStringDiag(const SMDiagnostic &Error, + SMRange SourceRange) { assert(SourceRange.isValid()); // Translate the location of the error from the location in the llvm IR string diff --git a/contrib/llvm/lib/CodeGen/MIRPrinter.cpp b/contrib/llvm/lib/CodeGen/MIRPrinter.cpp index d5cf9244199e..175cb0d51437 100644 --- a/contrib/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/MIRPrinter.cpp @@ -14,13 +14,20 @@ #include "MIRPrinter.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MIRYamlMapping.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSlotTracker.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/YAMLTraits.h" @@ -31,11 +38,38 @@ using namespace llvm; namespace { +/// This structure describes how to print out stack object references. +struct FrameIndexOperand { + std::string Name; + unsigned ID; + bool IsFixed; + + FrameIndexOperand(StringRef Name, unsigned ID, bool IsFixed) + : Name(Name.str()), ID(ID), IsFixed(IsFixed) {} + + /// Return an ordinary stack object reference. + static FrameIndexOperand create(StringRef Name, unsigned ID) { + return FrameIndexOperand(Name, ID, /*IsFixed=*/false); + } + + /// Return a fixed stack object reference. + static FrameIndexOperand createFixed(unsigned ID) { + return FrameIndexOperand("", ID, /*IsFixed=*/true); + } +}; + +} // end anonymous namespace + +namespace llvm { + /// This class prints out the machine functions using the MIR serialization /// format. class MIRPrinter { raw_ostream &OS; DenseMap<const uint32_t *, unsigned> RegisterMaskIds; + /// Maps from stack object indices to operand indices which will be used when + /// printing frame index machine operands. + DenseMap<int, FrameIndexOperand> StackObjectOperandMapping; public: MIRPrinter(raw_ostream &OS) : OS(OS) {} @@ -44,11 +78,16 @@ public: void convert(yaml::MachineFunction &MF, const MachineRegisterInfo &RegInfo, const TargetRegisterInfo *TRI); - void convert(yaml::MachineFrameInfo &YamlMFI, const MachineFrameInfo &MFI); - void convert(ModuleSlotTracker &MST, yaml::MachineBasicBlock &YamlMBB, - const MachineBasicBlock &MBB); + void convert(ModuleSlotTracker &MST, yaml::MachineFrameInfo &YamlMFI, + const MachineFrameInfo &MFI); + void convert(yaml::MachineFunction &MF, + const MachineConstantPool &ConstantPool); + void convert(ModuleSlotTracker &MST, yaml::MachineJumpTable &YamlJTI, + const MachineJumpTableInfo &JTI); void convertStackObjects(yaml::MachineFunction &MF, - const MachineFrameInfo &MFI); + const MachineFrameInfo &MFI, MachineModuleInfo &MMI, + ModuleSlotTracker &MST, + const TargetRegisterInfo *TRI); private: void initRegisterMaskIds(const MachineFunction &MF); @@ -60,18 +99,32 @@ class MIPrinter { raw_ostream &OS; ModuleSlotTracker &MST; const DenseMap<const uint32_t *, unsigned> &RegisterMaskIds; + const DenseMap<int, FrameIndexOperand> &StackObjectOperandMapping; public: MIPrinter(raw_ostream &OS, ModuleSlotTracker &MST, - const DenseMap<const uint32_t *, unsigned> &RegisterMaskIds) - : OS(OS), MST(MST), RegisterMaskIds(RegisterMaskIds) {} + const DenseMap<const uint32_t *, unsigned> &RegisterMaskIds, + const DenseMap<int, FrameIndexOperand> &StackObjectOperandMapping) + : OS(OS), MST(MST), RegisterMaskIds(RegisterMaskIds), + StackObjectOperandMapping(StackObjectOperandMapping) {} + + void print(const MachineBasicBlock &MBB); void print(const MachineInstr &MI); void printMBBReference(const MachineBasicBlock &MBB); - void print(const MachineOperand &Op, const TargetRegisterInfo *TRI); + void printIRBlockReference(const BasicBlock &BB); + void printIRValueReference(const Value &V); + void printStackObjectReference(int FrameIndex); + void printOffset(int64_t Offset); + void printTargetFlags(const MachineOperand &Op); + void print(const MachineOperand &Op, const TargetRegisterInfo *TRI, + unsigned I, bool ShouldPrintRegisterTies, bool IsDef = false); + void print(const MachineMemOperand &Op); + + void print(const MCCFIInstruction &CFI, const TargetRegisterInfo *TRI); }; -} // end anonymous namespace +} // end namespace llvm namespace llvm { namespace yaml { @@ -103,6 +156,12 @@ static void printReg(unsigned Reg, raw_ostream &OS, llvm_unreachable("Can't print this kind of register yet"); } +static void printReg(unsigned Reg, yaml::StringValue &Dest, + const TargetRegisterInfo *TRI) { + raw_string_ostream OS(Dest.Value); + printReg(Reg, OS, TRI); +} + void MIRPrinter::print(const MachineFunction &MF) { initRegisterMaskIds(MF); @@ -112,23 +171,25 @@ void MIRPrinter::print(const MachineFunction &MF) { YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice(); YamlMF.HasInlineAsm = MF.hasInlineAsm(); convert(YamlMF, MF.getRegInfo(), MF.getSubtarget().getRegisterInfo()); - convert(YamlMF.FrameInfo, *MF.getFrameInfo()); - convertStackObjects(YamlMF, *MF.getFrameInfo()); - - int I = 0; ModuleSlotTracker MST(MF.getFunction()->getParent()); + MST.incorporateFunction(*MF.getFunction()); + convert(MST, YamlMF.FrameInfo, *MF.getFrameInfo()); + convertStackObjects(YamlMF, *MF.getFrameInfo(), MF.getMMI(), MST, + MF.getSubtarget().getRegisterInfo()); + if (const auto *ConstantPool = MF.getConstantPool()) + convert(YamlMF, *ConstantPool); + if (const auto *JumpTableInfo = MF.getJumpTableInfo()) + convert(MST, YamlMF.JumpTableInfo, *JumpTableInfo); + raw_string_ostream StrOS(YamlMF.Body.Value.Value); + bool IsNewlineNeeded = false; for (const auto &MBB : MF) { - // TODO: Allow printing of non sequentially numbered MBBs. - // This is currently needed as the basic block references get their index - // from MBB.getNumber(), thus it should be sequential so that the parser can - // map back to the correct MBBs when parsing the output. - assert(MBB.getNumber() == I++ && - "Can't print MBBs that aren't sequentially numbered"); - (void)I; - yaml::MachineBasicBlock YamlMBB; - convert(MST, YamlMBB, MBB); - YamlMF.BasicBlocks.push_back(YamlMBB); + if (IsNewlineNeeded) + StrOS << "\n"; + MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping) + .print(MBB); + IsNewlineNeeded = true; } + StrOS.flush(); yaml::Output Out(OS); Out << YamlMF; } @@ -147,11 +208,38 @@ void MIRPrinter::convert(yaml::MachineFunction &MF, VReg.ID = I; VReg.Class = StringRef(TRI->getRegClassName(RegInfo.getRegClass(Reg))).lower(); + unsigned PreferredReg = RegInfo.getSimpleHint(Reg); + if (PreferredReg) + printReg(PreferredReg, VReg.PreferredRegister, TRI); MF.VirtualRegisters.push_back(VReg); } + + // Print the live ins. + for (auto I = RegInfo.livein_begin(), E = RegInfo.livein_end(); I != E; ++I) { + yaml::MachineFunctionLiveIn LiveIn; + printReg(I->first, LiveIn.Register, TRI); + if (I->second) + printReg(I->second, LiveIn.VirtualRegister, TRI); + MF.LiveIns.push_back(LiveIn); + } + // The used physical register mask is printed as an inverted callee saved + // register mask. + const BitVector &UsedPhysRegMask = RegInfo.getUsedPhysRegsMask(); + if (UsedPhysRegMask.none()) + return; + std::vector<yaml::FlowStringValue> CalleeSavedRegisters; + for (unsigned I = 0, E = UsedPhysRegMask.size(); I != E; ++I) { + if (!UsedPhysRegMask[I]) { + yaml::FlowStringValue Reg; + printReg(I, Reg, TRI); + CalleeSavedRegisters.push_back(Reg); + } + } + MF.CalleeSavedRegisters = CalleeSavedRegisters; } -void MIRPrinter::convert(yaml::MachineFrameInfo &YamlMFI, +void MIRPrinter::convert(ModuleSlotTracker &MST, + yaml::MachineFrameInfo &YamlMFI, const MachineFrameInfo &MFI) { YamlMFI.IsFrameAddressTaken = MFI.isFrameAddressTaken(); YamlMFI.IsReturnAddressTaken = MFI.isReturnAddressTaken(); @@ -166,10 +254,23 @@ void MIRPrinter::convert(yaml::MachineFrameInfo &YamlMFI, YamlMFI.HasOpaqueSPAdjustment = MFI.hasOpaqueSPAdjustment(); YamlMFI.HasVAStart = MFI.hasVAStart(); YamlMFI.HasMustTailInVarArgFunc = MFI.hasMustTailInVarArgFunc(); + if (MFI.getSavePoint()) { + raw_string_ostream StrOS(YamlMFI.SavePoint.Value); + MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping) + .printMBBReference(*MFI.getSavePoint()); + } + if (MFI.getRestorePoint()) { + raw_string_ostream StrOS(YamlMFI.RestorePoint.Value); + MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping) + .printMBBReference(*MFI.getRestorePoint()); + } } void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF, - const MachineFrameInfo &MFI) { + const MachineFrameInfo &MFI, + MachineModuleInfo &MMI, + ModuleSlotTracker &MST, + const TargetRegisterInfo *TRI) { // Process fixed stack objects. unsigned ID = 0; for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) { @@ -177,7 +278,7 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF, continue; yaml::FixedMachineStackObject YamlObject; - YamlObject.ID = ID++; + YamlObject.ID = ID; YamlObject.Type = MFI.isSpillSlotObjectIndex(I) ? yaml::FixedMachineStackObject::SpillSlot : yaml::FixedMachineStackObject::DefaultType; @@ -187,8 +288,8 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF, YamlObject.IsImmutable = MFI.isImmutableObjectIndex(I); YamlObject.IsAliased = MFI.isAliasedObjectIndex(I); MF.FixedStackObjects.push_back(YamlObject); - // TODO: Store the mapping between fixed object IDs and object indices to - // print the fixed stack object references correctly. + StackObjectOperandMapping.insert( + std::make_pair(I, FrameIndexOperand::createFixed(ID++))); } // Process ordinary stack objects. @@ -198,7 +299,10 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF, continue; yaml::MachineStackObject YamlObject; - YamlObject.ID = ID++; + YamlObject.ID = ID; + if (const auto *Alloca = MFI.getObjectAllocation(I)) + YamlObject.Name.Value = + Alloca->hasName() ? Alloca->getName() : "<unnamed alloca>"; YamlObject.Type = MFI.isSpillSlotObjectIndex(I) ? yaml::MachineStackObject::SpillSlot : MFI.isVariableSizedObjectIndex(I) @@ -209,47 +313,100 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF, YamlObject.Alignment = MFI.getObjectAlignment(I); MF.StackObjects.push_back(YamlObject); - // TODO: Store the mapping between object IDs and object indices to print - // the stack object references correctly. + StackObjectOperandMapping.insert(std::make_pair( + I, FrameIndexOperand::create(YamlObject.Name.Value, ID++))); + } + + for (const auto &CSInfo : MFI.getCalleeSavedInfo()) { + yaml::StringValue Reg; + printReg(CSInfo.getReg(), Reg, TRI); + auto StackObjectInfo = StackObjectOperandMapping.find(CSInfo.getFrameIdx()); + assert(StackObjectInfo != StackObjectOperandMapping.end() && + "Invalid stack object index"); + const FrameIndexOperand &StackObject = StackObjectInfo->second; + if (StackObject.IsFixed) + MF.FixedStackObjects[StackObject.ID].CalleeSavedRegister = Reg; + else + MF.StackObjects[StackObject.ID].CalleeSavedRegister = Reg; + } + for (unsigned I = 0, E = MFI.getLocalFrameObjectCount(); I < E; ++I) { + auto LocalObject = MFI.getLocalFrameObjectMap(I); + auto StackObjectInfo = StackObjectOperandMapping.find(LocalObject.first); + assert(StackObjectInfo != StackObjectOperandMapping.end() && + "Invalid stack object index"); + const FrameIndexOperand &StackObject = StackObjectInfo->second; + assert(!StackObject.IsFixed && "Expected a locally mapped stack object"); + MF.StackObjects[StackObject.ID].LocalOffset = LocalObject.second; + } + + // Print the stack object references in the frame information class after + // converting the stack objects. + if (MFI.hasStackProtectorIndex()) { + raw_string_ostream StrOS(MF.FrameInfo.StackProtector.Value); + MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping) + .printStackObjectReference(MFI.getStackProtectorIndex()); + } + + // Print the debug variable information. + for (MachineModuleInfo::VariableDbgInfo &DebugVar : + MMI.getVariableDbgInfo()) { + auto StackObjectInfo = StackObjectOperandMapping.find(DebugVar.Slot); + assert(StackObjectInfo != StackObjectOperandMapping.end() && + "Invalid stack object index"); + const FrameIndexOperand &StackObject = StackObjectInfo->second; + assert(!StackObject.IsFixed && "Expected a non-fixed stack object"); + auto &Object = MF.StackObjects[StackObject.ID]; + { + raw_string_ostream StrOS(Object.DebugVar.Value); + DebugVar.Var->printAsOperand(StrOS, MST); + } + { + raw_string_ostream StrOS(Object.DebugExpr.Value); + DebugVar.Expr->printAsOperand(StrOS, MST); + } + { + raw_string_ostream StrOS(Object.DebugLoc.Value); + DebugVar.Loc->printAsOperand(StrOS, MST); + } } } -void MIRPrinter::convert(ModuleSlotTracker &MST, - yaml::MachineBasicBlock &YamlMBB, - const MachineBasicBlock &MBB) { - assert(MBB.getNumber() >= 0 && "Invalid MBB number"); - YamlMBB.ID = (unsigned)MBB.getNumber(); - // TODO: Serialize unnamed BB references. - if (const auto *BB = MBB.getBasicBlock()) - YamlMBB.Name.Value = BB->hasName() ? BB->getName() : "<unnamed bb>"; - else - YamlMBB.Name.Value = ""; - YamlMBB.Alignment = MBB.getAlignment(); - YamlMBB.AddressTaken = MBB.hasAddressTaken(); - YamlMBB.IsLandingPad = MBB.isLandingPad(); - for (const auto *SuccMBB : MBB.successors()) { +void MIRPrinter::convert(yaml::MachineFunction &MF, + const MachineConstantPool &ConstantPool) { + unsigned ID = 0; + for (const MachineConstantPoolEntry &Constant : ConstantPool.getConstants()) { + // TODO: Serialize target specific constant pool entries. + if (Constant.isMachineConstantPoolEntry()) + llvm_unreachable("Can't print target specific constant pool entries yet"); + + yaml::MachineConstantPoolValue YamlConstant; std::string Str; raw_string_ostream StrOS(Str); - MIPrinter(StrOS, MST, RegisterMaskIds).printMBBReference(*SuccMBB); - YamlMBB.Successors.push_back(StrOS.str()); + Constant.Val.ConstVal->printAsOperand(StrOS); + YamlConstant.ID = ID++; + YamlConstant.Value = StrOS.str(); + YamlConstant.Alignment = Constant.getAlignment(); + MF.Constants.push_back(YamlConstant); } - // Print the live in registers. - const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); - assert(TRI && "Expected target register info"); - for (auto I = MBB.livein_begin(), E = MBB.livein_end(); I != E; ++I) { +} + +void MIRPrinter::convert(ModuleSlotTracker &MST, + yaml::MachineJumpTable &YamlJTI, + const MachineJumpTableInfo &JTI) { + YamlJTI.Kind = JTI.getEntryKind(); + unsigned ID = 0; + for (const auto &Table : JTI.getJumpTables()) { std::string Str; - raw_string_ostream StrOS(Str); - printReg(*I, StrOS, TRI); - YamlMBB.LiveIns.push_back(StrOS.str()); - } - // Print the machine instructions. - YamlMBB.Instructions.reserve(MBB.size()); - std::string Str; - for (const auto &MI : MBB) { - raw_string_ostream StrOS(Str); - MIPrinter(StrOS, MST, RegisterMaskIds).print(MI); - YamlMBB.Instructions.push_back(StrOS.str()); - Str.clear(); + yaml::MachineJumpTable::Entry Entry; + Entry.ID = ID++; + for (const auto *MBB : Table.MBBs) { + raw_string_ostream StrOS(Str); + MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping) + .printMBBReference(*MBB); + Entry.Blocks.push_back(StrOS.str()); + Str.clear(); + } + YamlJTI.Entries.push_back(Entry); } } @@ -260,26 +417,137 @@ void MIRPrinter::initRegisterMaskIds(const MachineFunction &MF) { RegisterMaskIds.insert(std::make_pair(Mask, I++)); } +void MIPrinter::print(const MachineBasicBlock &MBB) { + assert(MBB.getNumber() >= 0 && "Invalid MBB number"); + OS << "bb." << MBB.getNumber(); + bool HasAttributes = false; + if (const auto *BB = MBB.getBasicBlock()) { + if (BB->hasName()) { + OS << "." << BB->getName(); + } else { + HasAttributes = true; + OS << " ("; + int Slot = MST.getLocalSlot(BB); + if (Slot == -1) + OS << "<ir-block badref>"; + else + OS << (Twine("%ir-block.") + Twine(Slot)).str(); + } + } + if (MBB.hasAddressTaken()) { + OS << (HasAttributes ? ", " : " ("); + OS << "address-taken"; + HasAttributes = true; + } + if (MBB.isEHPad()) { + OS << (HasAttributes ? ", " : " ("); + OS << "landing-pad"; + HasAttributes = true; + } + if (MBB.getAlignment()) { + OS << (HasAttributes ? ", " : " ("); + OS << "align " << MBB.getAlignment(); + HasAttributes = true; + } + if (HasAttributes) + OS << ")"; + OS << ":\n"; + + bool HasLineAttributes = false; + // Print the successors + if (!MBB.succ_empty()) { + OS.indent(2) << "successors: "; + for (auto I = MBB.succ_begin(), E = MBB.succ_end(); I != E; ++I) { + if (I != MBB.succ_begin()) + OS << ", "; + printMBBReference(**I); + if (MBB.hasSuccessorProbabilities()) + OS << '(' << MBB.getSuccProbability(I) << ')'; + } + OS << "\n"; + HasLineAttributes = true; + } + + // Print the live in registers. + const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); + assert(TRI && "Expected target register info"); + if (!MBB.livein_empty()) { + OS.indent(2) << "liveins: "; + bool First = true; + for (const auto &LI : MBB.liveins()) { + if (!First) + OS << ", "; + First = false; + printReg(LI.PhysReg, OS, TRI); + if (LI.LaneMask != ~0u) + OS << ':' << PrintLaneMask(LI.LaneMask); + } + OS << "\n"; + HasLineAttributes = true; + } + + if (HasLineAttributes) + OS << "\n"; + bool IsInBundle = false; + for (auto I = MBB.instr_begin(), E = MBB.instr_end(); I != E; ++I) { + const MachineInstr &MI = *I; + if (IsInBundle && !MI.isInsideBundle()) { + OS.indent(2) << "}\n"; + IsInBundle = false; + } + OS.indent(IsInBundle ? 4 : 2); + print(MI); + if (!IsInBundle && MI.getFlag(MachineInstr::BundledSucc)) { + OS << " {"; + IsInBundle = true; + } + OS << "\n"; + } + if (IsInBundle) + OS.indent(2) << "}\n"; +} + +/// Return true when an instruction has tied register that can't be determined +/// by the instruction's descriptor. +static bool hasComplexRegisterTies(const MachineInstr &MI) { + const MCInstrDesc &MCID = MI.getDesc(); + for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) { + const auto &Operand = MI.getOperand(I); + if (!Operand.isReg() || Operand.isDef()) + // Ignore the defined registers as MCID marks only the uses as tied. + continue; + int ExpectedTiedIdx = MCID.getOperandConstraint(I, MCOI::TIED_TO); + int TiedIdx = Operand.isTied() ? int(MI.findTiedOperandIdx(I)) : -1; + if (ExpectedTiedIdx != TiedIdx) + return true; + } + return false; +} + void MIPrinter::print(const MachineInstr &MI) { const auto &SubTarget = MI.getParent()->getParent()->getSubtarget(); const auto *TRI = SubTarget.getRegisterInfo(); assert(TRI && "Expected target register info"); const auto *TII = SubTarget.getInstrInfo(); assert(TII && "Expected target instruction info"); + if (MI.isCFIInstruction()) + assert(MI.getNumOperands() == 1 && "Expected 1 operand in CFI instruction"); + bool ShouldPrintRegisterTies = hasComplexRegisterTies(MI); unsigned I = 0, E = MI.getNumOperands(); for (; I < E && MI.getOperand(I).isReg() && MI.getOperand(I).isDef() && !MI.getOperand(I).isImplicit(); ++I) { if (I) OS << ", "; - print(MI.getOperand(I), TRI); + print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies, /*IsDef=*/true); } if (I) OS << " = "; + if (MI.getFlag(MachineInstr::FrameSetup)) + OS << "frame-setup "; OS << TII->getName(MI.getOpcode()); - // TODO: Print the instruction flags, machine mem operands. if (I < E) OS << ' '; @@ -287,9 +555,27 @@ void MIPrinter::print(const MachineInstr &MI) { for (; I < E; ++I) { if (NeedComma) OS << ", "; - print(MI.getOperand(I), TRI); + print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies); NeedComma = true; } + + if (MI.getDebugLoc()) { + if (NeedComma) + OS << ','; + OS << " debug-location "; + MI.getDebugLoc()->printAsOperand(OS, MST); + } + + if (!MI.memoperands_empty()) { + OS << " :: "; + bool NeedComma = false; + for (const auto *Op : MI.memoperands()) { + if (NeedComma) + OS << ", "; + print(*Op); + NeedComma = true; + } + } } void MIPrinter::printMBBReference(const MachineBasicBlock &MBB) { @@ -300,32 +586,225 @@ void MIPrinter::printMBBReference(const MachineBasicBlock &MBB) { } } -void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI) { +static void printIRSlotNumber(raw_ostream &OS, int Slot) { + if (Slot == -1) + OS << "<badref>"; + else + OS << Slot; +} + +void MIPrinter::printIRBlockReference(const BasicBlock &BB) { + OS << "%ir-block."; + if (BB.hasName()) { + printLLVMNameWithoutPrefix(OS, BB.getName()); + return; + } + const Function *F = BB.getParent(); + int Slot; + if (F == MST.getCurrentFunction()) { + Slot = MST.getLocalSlot(&BB); + } else { + ModuleSlotTracker CustomMST(F->getParent(), + /*ShouldInitializeAllMetadata=*/false); + CustomMST.incorporateFunction(*F); + Slot = CustomMST.getLocalSlot(&BB); + } + printIRSlotNumber(OS, Slot); +} + +void MIPrinter::printIRValueReference(const Value &V) { + if (isa<GlobalValue>(V)) { + V.printAsOperand(OS, /*PrintType=*/false, MST); + return; + } + if (isa<Constant>(V)) { + // Machine memory operands can load/store to/from constant value pointers. + OS << '`'; + V.printAsOperand(OS, /*PrintType=*/true, MST); + OS << '`'; + return; + } + OS << "%ir."; + if (V.hasName()) { + printLLVMNameWithoutPrefix(OS, V.getName()); + return; + } + printIRSlotNumber(OS, MST.getLocalSlot(&V)); +} + +void MIPrinter::printStackObjectReference(int FrameIndex) { + auto ObjectInfo = StackObjectOperandMapping.find(FrameIndex); + assert(ObjectInfo != StackObjectOperandMapping.end() && + "Invalid frame index"); + const FrameIndexOperand &Operand = ObjectInfo->second; + if (Operand.IsFixed) { + OS << "%fixed-stack." << Operand.ID; + return; + } + OS << "%stack." << Operand.ID; + if (!Operand.Name.empty()) + OS << '.' << Operand.Name; +} + +void MIPrinter::printOffset(int64_t Offset) { + if (Offset == 0) + return; + if (Offset < 0) { + OS << " - " << -Offset; + return; + } + OS << " + " << Offset; +} + +static const char *getTargetFlagName(const TargetInstrInfo *TII, unsigned TF) { + auto Flags = TII->getSerializableDirectMachineOperandTargetFlags(); + for (const auto &I : Flags) { + if (I.first == TF) { + return I.second; + } + } + return nullptr; +} + +void MIPrinter::printTargetFlags(const MachineOperand &Op) { + if (!Op.getTargetFlags()) + return; + const auto *TII = + Op.getParent()->getParent()->getParent()->getSubtarget().getInstrInfo(); + assert(TII && "expected instruction info"); + auto Flags = TII->decomposeMachineOperandsTargetFlags(Op.getTargetFlags()); + OS << "target-flags("; + const bool HasDirectFlags = Flags.first; + const bool HasBitmaskFlags = Flags.second; + if (!HasDirectFlags && !HasBitmaskFlags) { + OS << "<unknown>) "; + return; + } + if (HasDirectFlags) { + if (const auto *Name = getTargetFlagName(TII, Flags.first)) + OS << Name; + else + OS << "<unknown target flag>"; + } + if (!HasBitmaskFlags) { + OS << ") "; + return; + } + bool IsCommaNeeded = HasDirectFlags; + unsigned BitMask = Flags.second; + auto BitMasks = TII->getSerializableBitmaskMachineOperandTargetFlags(); + for (const auto &Mask : BitMasks) { + // Check if the flag's bitmask has the bits of the current mask set. + if ((BitMask & Mask.first) == Mask.first) { + if (IsCommaNeeded) + OS << ", "; + IsCommaNeeded = true; + OS << Mask.second; + // Clear the bits which were serialized from the flag's bitmask. + BitMask &= ~(Mask.first); + } + } + if (BitMask) { + // When the resulting flag's bitmask isn't zero, we know that we didn't + // serialize all of the bit flags. + if (IsCommaNeeded) + OS << ", "; + OS << "<unknown bitmask target flag>"; + } + OS << ") "; +} + +static const char *getTargetIndexName(const MachineFunction &MF, int Index) { + const auto *TII = MF.getSubtarget().getInstrInfo(); + assert(TII && "expected instruction info"); + auto Indices = TII->getSerializableTargetIndices(); + for (const auto &I : Indices) { + if (I.first == Index) { + return I.second; + } + } + return nullptr; +} + +void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI, + unsigned I, bool ShouldPrintRegisterTies, bool IsDef) { + printTargetFlags(Op); switch (Op.getType()) { case MachineOperand::MO_Register: - // TODO: Print the other register flags. if (Op.isImplicit()) OS << (Op.isDef() ? "implicit-def " : "implicit "); + else if (!IsDef && Op.isDef()) + // Print the 'def' flag only when the operand is defined after '='. + OS << "def "; + if (Op.isInternalRead()) + OS << "internal "; if (Op.isDead()) OS << "dead "; if (Op.isKill()) OS << "killed "; if (Op.isUndef()) OS << "undef "; + if (Op.isEarlyClobber()) + OS << "early-clobber "; + if (Op.isDebug()) + OS << "debug-use "; printReg(Op.getReg(), OS, TRI); // Print the sub register. if (Op.getSubReg() != 0) OS << ':' << TRI->getSubRegIndexName(Op.getSubReg()); + if (ShouldPrintRegisterTies && Op.isTied() && !Op.isDef()) + OS << "(tied-def " << Op.getParent()->findTiedOperandIdx(I) << ")"; break; case MachineOperand::MO_Immediate: OS << Op.getImm(); break; + case MachineOperand::MO_CImmediate: + Op.getCImm()->printAsOperand(OS, /*PrintType=*/true, MST); + break; + case MachineOperand::MO_FPImmediate: + Op.getFPImm()->printAsOperand(OS, /*PrintType=*/true, MST); + break; case MachineOperand::MO_MachineBasicBlock: printMBBReference(*Op.getMBB()); break; + case MachineOperand::MO_FrameIndex: + printStackObjectReference(Op.getIndex()); + break; + case MachineOperand::MO_ConstantPoolIndex: + OS << "%const." << Op.getIndex(); + printOffset(Op.getOffset()); + break; + case MachineOperand::MO_TargetIndex: { + OS << "target-index("; + if (const auto *Name = getTargetIndexName( + *Op.getParent()->getParent()->getParent(), Op.getIndex())) + OS << Name; + else + OS << "<unknown>"; + OS << ')'; + printOffset(Op.getOffset()); + break; + } + case MachineOperand::MO_JumpTableIndex: + OS << "%jump-table." << Op.getIndex(); + break; + case MachineOperand::MO_ExternalSymbol: + OS << '$'; + printLLVMNameWithoutPrefix(OS, Op.getSymbolName()); + printOffset(Op.getOffset()); + break; case MachineOperand::MO_GlobalAddress: Op.getGlobal()->printAsOperand(OS, /*PrintType=*/false, MST); - // TODO: Print offset and target flags. + printOffset(Op.getOffset()); + break; + case MachineOperand::MO_BlockAddress: + OS << "blockaddress("; + Op.getBlockAddress()->getFunction()->printAsOperand(OS, /*PrintType=*/false, + MST); + OS << ", "; + printIRBlockReference(*Op.getBlockAddress()->getBasicBlock()); + OS << ')'; + printOffset(Op.getOffset()); break; case MachineOperand::MO_RegisterMask: { auto RegMaskInfo = RegisterMaskIds.find(Op.getRegMask()); @@ -335,9 +814,157 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI) { llvm_unreachable("Can't print this machine register mask yet."); break; } + case MachineOperand::MO_RegisterLiveOut: { + const uint32_t *RegMask = Op.getRegLiveOut(); + OS << "liveout("; + bool IsCommaNeeded = false; + for (unsigned Reg = 0, E = TRI->getNumRegs(); Reg < E; ++Reg) { + if (RegMask[Reg / 32] & (1U << (Reg % 32))) { + if (IsCommaNeeded) + OS << ", "; + printReg(Reg, OS, TRI); + IsCommaNeeded = true; + } + } + OS << ")"; + break; + } + case MachineOperand::MO_Metadata: + Op.getMetadata()->printAsOperand(OS, MST); + break; + case MachineOperand::MO_MCSymbol: + OS << "<mcsymbol " << *Op.getMCSymbol() << ">"; + break; + case MachineOperand::MO_CFIIndex: { + const auto &MMI = Op.getParent()->getParent()->getParent()->getMMI(); + print(MMI.getFrameInstructions()[Op.getCFIIndex()], TRI); + break; + } + } +} + +void MIPrinter::print(const MachineMemOperand &Op) { + OS << '('; + // TODO: Print operand's target specific flags. + if (Op.isVolatile()) + OS << "volatile "; + if (Op.isNonTemporal()) + OS << "non-temporal "; + if (Op.isInvariant()) + OS << "invariant "; + if (Op.isLoad()) + OS << "load "; + else { + assert(Op.isStore() && "Non load machine operand must be a store"); + OS << "store "; + } + OS << Op.getSize() << (Op.isLoad() ? " from " : " into "); + if (const Value *Val = Op.getValue()) { + printIRValueReference(*Val); + } else { + const PseudoSourceValue *PVal = Op.getPseudoValue(); + assert(PVal && "Expected a pseudo source value"); + switch (PVal->kind()) { + case PseudoSourceValue::Stack: + OS << "stack"; + break; + case PseudoSourceValue::GOT: + OS << "got"; + break; + case PseudoSourceValue::JumpTable: + OS << "jump-table"; + break; + case PseudoSourceValue::ConstantPool: + OS << "constant-pool"; + break; + case PseudoSourceValue::FixedStack: + printStackObjectReference( + cast<FixedStackPseudoSourceValue>(PVal)->getFrameIndex()); + break; + case PseudoSourceValue::GlobalValueCallEntry: + OS << "call-entry "; + cast<GlobalValuePseudoSourceValue>(PVal)->getValue()->printAsOperand( + OS, /*PrintType=*/false, MST); + break; + case PseudoSourceValue::ExternalSymbolCallEntry: + OS << "call-entry $"; + printLLVMNameWithoutPrefix( + OS, cast<ExternalSymbolPseudoSourceValue>(PVal)->getSymbol()); + break; + } + } + printOffset(Op.getOffset()); + if (Op.getBaseAlignment() != Op.getSize()) + OS << ", align " << Op.getBaseAlignment(); + auto AAInfo = Op.getAAInfo(); + if (AAInfo.TBAA) { + OS << ", !tbaa "; + AAInfo.TBAA->printAsOperand(OS, MST); + } + if (AAInfo.Scope) { + OS << ", !alias.scope "; + AAInfo.Scope->printAsOperand(OS, MST); + } + if (AAInfo.NoAlias) { + OS << ", !noalias "; + AAInfo.NoAlias->printAsOperand(OS, MST); + } + if (Op.getRanges()) { + OS << ", !range "; + Op.getRanges()->printAsOperand(OS, MST); + } + OS << ')'; +} + +static void printCFIRegister(unsigned DwarfReg, raw_ostream &OS, + const TargetRegisterInfo *TRI) { + int Reg = TRI->getLLVMRegNum(DwarfReg, true); + if (Reg == -1) { + OS << "<badreg>"; + return; + } + printReg(Reg, OS, TRI); +} + +void MIPrinter::print(const MCCFIInstruction &CFI, + const TargetRegisterInfo *TRI) { + switch (CFI.getOperation()) { + case MCCFIInstruction::OpSameValue: + OS << ".cfi_same_value "; + if (CFI.getLabel()) + OS << "<mcsymbol> "; + printCFIRegister(CFI.getRegister(), OS, TRI); + break; + case MCCFIInstruction::OpOffset: + OS << ".cfi_offset "; + if (CFI.getLabel()) + OS << "<mcsymbol> "; + printCFIRegister(CFI.getRegister(), OS, TRI); + OS << ", " << CFI.getOffset(); + break; + case MCCFIInstruction::OpDefCfaRegister: + OS << ".cfi_def_cfa_register "; + if (CFI.getLabel()) + OS << "<mcsymbol> "; + printCFIRegister(CFI.getRegister(), OS, TRI); + break; + case MCCFIInstruction::OpDefCfaOffset: + OS << ".cfi_def_cfa_offset "; + if (CFI.getLabel()) + OS << "<mcsymbol> "; + OS << CFI.getOffset(); + break; + case MCCFIInstruction::OpDefCfa: + OS << ".cfi_def_cfa "; + if (CFI.getLabel()) + OS << "<mcsymbol> "; + printCFIRegister(CFI.getRegister(), OS, TRI); + OS << ", " << CFI.getOffset(); + break; default: - // TODO: Print the other machine operands. - llvm_unreachable("Can't print this machine operand at the moment"); + // TODO: Print the other CFI Operations. + OS << "<unserializable cfi operation>"; + break; } } diff --git a/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp b/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp index 13d61e65d7e0..8e7566a4e46b 100644 --- a/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp +++ b/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp @@ -40,7 +40,7 @@ struct MIRPrintingPass : public MachineFunctionPass { MachineFunctionPass::getAnalysisUsage(AU); } - virtual bool runOnMachineFunction(MachineFunction &MF) override { + bool runOnMachineFunction(MachineFunction &MF) override { std::string Str; raw_string_ostream StrOS(Str); printMIR(StrOS, MF); @@ -48,7 +48,7 @@ struct MIRPrintingPass : public MachineFunctionPass { return false; } - virtual bool doFinalization(Module &M) override { + bool doFinalization(Module &M) override { printMIR(OS, M); OS << MachineFunctions; return false; diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp index 5d3f7ebaed29..76099f28499b 100644 --- a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -27,6 +27,7 @@ #include "llvm/IR/ModuleSlotTracker.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/Support/DataTypes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" @@ -38,22 +39,21 @@ using namespace llvm; #define DEBUG_TYPE "codegen" -MachineBasicBlock::MachineBasicBlock(MachineFunction &mf, const BasicBlock *bb) - : BB(bb), Number(-1), xParent(&mf), Alignment(0), IsLandingPad(false), - AddressTaken(false), CachedMCSymbol(nullptr) { +MachineBasicBlock::MachineBasicBlock(MachineFunction &MF, const BasicBlock *B) + : BB(B), Number(-1), xParent(&MF) { Insts.Parent = this; } MachineBasicBlock::~MachineBasicBlock() { } -/// getSymbol - Return the MCSymbol for this basic block. -/// +/// Return the MCSymbol for this basic block. MCSymbol *MachineBasicBlock::getSymbol() const { if (!CachedMCSymbol) { const MachineFunction *MF = getParent(); MCContext &Ctx = MF->getContext(); const char *Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix(); + assert(getNumber() >= 0 && "cannot get label for unreachable MBB"); CachedMCSymbol = Ctx.getOrCreateSymbol(Twine(Prefix) + "BB" + Twine(MF->getFunctionNumber()) + "_" + Twine(getNumber())); @@ -68,9 +68,9 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) { return OS; } -/// addNodeToList (MBB) - When an MBB is added to an MF, we need to update the -/// parent pointer of the MBB, the MBB numbering, and any instructions in the -/// MBB to be on the right operand list for registers. +/// When an MBB is added to an MF, we need to update the parent pointer of the +/// MBB, the MBB numbering, and any instructions in the MBB to be on the right +/// operand list for registers. /// /// MBBs start out as #-1. When a MBB is added to a MachineFunction, it /// gets the next available unique MBB number. If it is removed from a @@ -91,10 +91,8 @@ void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock *N) { N->Number = -1; } - -/// addNodeToList (MI) - When we add an instruction to a basic block -/// list, we update its parent pointer and add its operands from reg use/def -/// lists if appropriate. +/// When we add an instruction to a basic block list, we update its parent +/// pointer and add its operands from reg use/def lists if appropriate. void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) { assert(!N->getParent() && "machine instruction already in a basic block"); N->setParent(Parent); @@ -105,9 +103,8 @@ void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) { N->AddRegOperandsToUseLists(MF->getRegInfo()); } -/// removeNodeFromList (MI) - When we remove an instruction from a basic block -/// list, we update its parent pointer and remove its operands from reg use/def -/// lists if appropriate. +/// When we remove an instruction from a basic block list, we update its parent +/// pointer and remove its operands from reg use/def lists if appropriate. void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) { assert(N->getParent() && "machine instruction not in a basic block"); @@ -118,23 +115,22 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) { N->setParent(nullptr); } -/// transferNodesFromList (MI) - When moving a range of instructions from one -/// MBB list to another, we need to update the parent pointers and the use/def -/// lists. +/// When moving a range of instructions from one MBB list to another, we need to +/// update the parent pointers and the use/def lists. void ilist_traits<MachineInstr>:: -transferNodesFromList(ilist_traits<MachineInstr> &fromList, - ilist_iterator<MachineInstr> first, - ilist_iterator<MachineInstr> last) { - assert(Parent->getParent() == fromList.Parent->getParent() && +transferNodesFromList(ilist_traits<MachineInstr> &FromList, + ilist_iterator<MachineInstr> First, + ilist_iterator<MachineInstr> Last) { + assert(Parent->getParent() == FromList.Parent->getParent() && "MachineInstr parent mismatch!"); // Splice within the same MBB -> no change. - if (Parent == fromList.Parent) return; + if (Parent == FromList.Parent) return; // If splicing between two blocks within the same function, just update the // parent pointers. - for (; first != last; ++first) - first->setParent(Parent); + for (; First != Last; ++First) + First->setParent(Parent); } void ilist_traits<MachineInstr>::deleteNode(MachineInstr* MI) { @@ -208,11 +204,18 @@ const MachineBasicBlock *MachineBasicBlock::getLandingPadSuccessor() const { if (succ_size() > 2) return nullptr; for (const_succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I) - if ((*I)->isLandingPad()) + if ((*I)->isEHPad()) return *I; return nullptr; } +bool MachineBasicBlock::hasEHPadSuccessor() const { + for (const_succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I) + if ((*I)->isEHPad()) + return true; + return false; +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void MachineBasicBlock::dump() const { print(dbgs()); @@ -271,7 +274,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, LBB->printAsOperand(OS, /*PrintType=*/false, MST); Comma = ", "; } - if (isLandingPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; } + if (isEHPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; } if (hasAddressTaken()) { OS << Comma << "ADDRESS TAKEN"; Comma = ", "; } if (Alignment) OS << Comma << "Align " << Alignment << " (" << (1u << Alignment) @@ -283,8 +286,11 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, if (!livein_empty()) { if (Indexes) OS << '\t'; OS << " Live Ins:"; - for (livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I) - OS << ' ' << PrintReg(*I, TRI); + for (const auto &LI : make_range(livein_begin(), livein_end())) { + OS << ' ' << PrintReg(LI.PhysReg, TRI); + if (LI.LaneMask != ~0u) + OS << ':' << PrintLaneMask(LI.LaneMask); + } OS << '\n'; } // Print the preds of this block according to the CFG. @@ -298,8 +304,8 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, for (const_instr_iterator I = instr_begin(); I != instr_end(); ++I) { if (Indexes) { - if (Indexes->hasIndex(I)) - OS << Indexes->getInstructionIndex(I); + if (Indexes->hasIndex(&*I)) + OS << Indexes->getInstructionIndex(&*I); OS << '\t'; } OS << '\t'; @@ -314,35 +320,63 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << " Successors according to CFG:"; for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI) { OS << " BB#" << (*SI)->getNumber(); - if (!Weights.empty()) - OS << '(' << *getWeightIterator(SI) << ')'; + if (!Probs.empty()) + OS << '(' << *getProbabilityIterator(SI) << ')'; } OS << '\n'; } } -void MachineBasicBlock::printAsOperand(raw_ostream &OS, bool /*PrintType*/) const { +void MachineBasicBlock::printAsOperand(raw_ostream &OS, + bool /*PrintType*/) const { OS << "BB#" << getNumber(); } -void MachineBasicBlock::removeLiveIn(unsigned Reg) { - std::vector<unsigned>::iterator I = - std::find(LiveIns.begin(), LiveIns.end(), Reg); - if (I != LiveIns.end()) +void MachineBasicBlock::removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) { + LiveInVector::iterator I = std::find_if( + LiveIns.begin(), LiveIns.end(), + [Reg] (const RegisterMaskPair &LI) { return LI.PhysReg == Reg; }); + if (I == LiveIns.end()) + return; + + I->LaneMask &= ~LaneMask; + if (I->LaneMask == 0) LiveIns.erase(I); } -bool MachineBasicBlock::isLiveIn(unsigned Reg) const { - livein_iterator I = std::find(livein_begin(), livein_end(), Reg); - return I != livein_end(); +bool MachineBasicBlock::isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) const { + livein_iterator I = std::find_if( + LiveIns.begin(), LiveIns.end(), + [Reg] (const RegisterMaskPair &LI) { return LI.PhysReg == Reg; }); + return I != livein_end() && (I->LaneMask & LaneMask) != 0; +} + +void MachineBasicBlock::sortUniqueLiveIns() { + std::sort(LiveIns.begin(), LiveIns.end(), + [](const RegisterMaskPair &LI0, const RegisterMaskPair &LI1) { + return LI0.PhysReg < LI1.PhysReg; + }); + // Liveins are sorted by physreg now we can merge their lanemasks. + LiveInVector::const_iterator I = LiveIns.begin(); + LiveInVector::const_iterator J; + LiveInVector::iterator Out = LiveIns.begin(); + for (; I != LiveIns.end(); ++Out, I = J) { + unsigned PhysReg = I->PhysReg; + LaneBitmask LaneMask = I->LaneMask; + for (J = std::next(I); J != LiveIns.end() && J->PhysReg == PhysReg; ++J) + LaneMask |= J->LaneMask; + Out->PhysReg = PhysReg; + Out->LaneMask = LaneMask; + } + LiveIns.erase(Out, LiveIns.end()); } unsigned -MachineBasicBlock::addLiveIn(unsigned PhysReg, const TargetRegisterClass *RC) { +MachineBasicBlock::addLiveIn(MCPhysReg PhysReg, const TargetRegisterClass *RC) { assert(getParent() && "MBB must be inserted in function"); assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && "Expected physreg"); assert(RC && "Register class is required"); - assert((isLandingPad() || this == &getParent()->front()) && + assert((isEHPad() || this == &getParent()->front()) && "Only the entry block and landing pads can have physreg live ins"); bool LiveIn = isLiveIn(PhysReg); @@ -370,12 +404,11 @@ MachineBasicBlock::addLiveIn(unsigned PhysReg, const TargetRegisterClass *RC) { } void MachineBasicBlock::moveBefore(MachineBasicBlock *NewAfter) { - getParent()->splice(NewAfter, this); + getParent()->splice(NewAfter->getIterator(), getIterator()); } void MachineBasicBlock::moveAfter(MachineBasicBlock *NewBefore) { - MachineFunction::iterator BBI = NewBefore; - getParent()->splice(++BBI, this); + getParent()->splice(++NewBefore->getIterator(), getIterator()); } void MachineBasicBlock::updateTerminator() { @@ -385,7 +418,7 @@ void MachineBasicBlock::updateTerminator() { MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 4> Cond; - DebugLoc dl; // FIXME: this is nowhere + DebugLoc DL; // FIXME: this is nowhere bool B = TII->AnalyzeBranch(*this, TBB, FBB, Cond); (void) B; assert(!B && "UpdateTerminators requires analyzable predecessors!"); @@ -400,7 +433,7 @@ void MachineBasicBlock::updateTerminator() { // its layout successor, insert a branch. First we have to locate the // only non-landing-pad successor, as that is the fallthrough block. for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) { - if ((*SI)->isLandingPad()) + if ((*SI)->isEHPad()) continue; assert(!TBB && "Found more than one non-landing-pad successor!"); TBB = *SI; @@ -414,7 +447,7 @@ void MachineBasicBlock::updateTerminator() { // Finally update the unconditional successor to be reached via a branch // if it would not be reached by fallthrough. if (!isLayoutSuccessor(TBB)) - TII->InsertBranch(*this, TBB, nullptr, Cond, dl); + TII->InsertBranch(*this, TBB, nullptr, Cond, DL); } } else { if (FBB) { @@ -425,10 +458,10 @@ void MachineBasicBlock::updateTerminator() { if (TII->ReverseBranchCondition(Cond)) return; TII->RemoveBranch(*this); - TII->InsertBranch(*this, FBB, nullptr, Cond, dl); + TII->InsertBranch(*this, FBB, nullptr, Cond, DL); } else if (isLayoutSuccessor(FBB)) { TII->RemoveBranch(*this); - TII->InsertBranch(*this, TBB, nullptr, Cond, dl); + TII->InsertBranch(*this, TBB, nullptr, Cond, DL); } } else { // Walk through the successors and find the successor which is not @@ -436,7 +469,7 @@ void MachineBasicBlock::updateTerminator() { // as the fallthrough successor. MachineBasicBlock *FallthroughBB = nullptr; for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) { - if ((*SI)->isLandingPad() || *SI == TBB) + if ((*SI)->isEHPad() || *SI == TBB) continue; assert(!FallthroughBB && "Found more than one fallthrough successor."); FallthroughBB = *SI; @@ -445,14 +478,14 @@ void MachineBasicBlock::updateTerminator() { // We fallthrough to the same basic block as the conditional jump // targets. Remove the conditional jump, leaving unconditional // fallthrough. - // FIXME: This does not seem like a reasonable pattern to support, but it - // has been seen in the wild coming out of degenerate ARM test cases. + // FIXME: This does not seem like a reasonable pattern to support, but + // it has been seen in the wild coming out of degenerate ARM test cases. TII->RemoveBranch(*this); // Finally update the unconditional successor to be reached via a branch // if it would not be reached by fallthrough. if (!isLayoutSuccessor(TBB)) - TII->InsertBranch(*this, TBB, nullptr, Cond, dl); + TII->InsertBranch(*this, TBB, nullptr, Cond, DL); return; } @@ -461,55 +494,69 @@ void MachineBasicBlock::updateTerminator() { if (TII->ReverseBranchCondition(Cond)) { // We can't reverse the condition, add an unconditional branch. Cond.clear(); - TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, dl); + TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, DL); return; } TII->RemoveBranch(*this); - TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, dl); + TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, DL); } else if (!isLayoutSuccessor(FallthroughBB)) { TII->RemoveBranch(*this); - TII->InsertBranch(*this, TBB, FallthroughBB, Cond, dl); + TII->InsertBranch(*this, TBB, FallthroughBB, Cond, DL); } } } } -void MachineBasicBlock::addSuccessor(MachineBasicBlock *succ, uint32_t weight) { - - // If we see non-zero value for the first time it means we actually use Weight - // list, so we fill all Weights with 0's. - if (weight != 0 && Weights.empty()) - Weights.resize(Successors.size()); - - if (weight != 0 || !Weights.empty()) - Weights.push_back(weight); - - Successors.push_back(succ); - succ->addPredecessor(this); - } +void MachineBasicBlock::validateSuccProbs() const { +#ifndef NDEBUG + int64_t Sum = 0; + for (auto Prob : Probs) + Sum += Prob.getNumerator(); + // Due to precision issue, we assume that the sum of probabilities is one if + // the difference between the sum of their numerators and the denominator is + // no greater than the number of successors. + assert((uint64_t)std::abs(Sum - BranchProbability::getDenominator()) <= + Probs.size() && + "The sum of successors's probabilities exceeds one."); +#endif // NDEBUG +} -void MachineBasicBlock::removeSuccessor(MachineBasicBlock *succ) { - succ->removePredecessor(this); - succ_iterator I = std::find(Successors.begin(), Successors.end(), succ); - assert(I != Successors.end() && "Not a current successor!"); +void MachineBasicBlock::addSuccessor(MachineBasicBlock *Succ, + BranchProbability Prob) { + // Probability list is either empty (if successor list isn't empty, this means + // disabled optimization) or has the same size as successor list. + if (!(Probs.empty() && !Successors.empty())) + Probs.push_back(Prob); + Successors.push_back(Succ); + Succ->addPredecessor(this); +} - // If Weight list is empty it means we don't use it (disabled optimization). - if (!Weights.empty()) { - weight_iterator WI = getWeightIterator(I); - Weights.erase(WI); - } +void MachineBasicBlock::addSuccessorWithoutProb(MachineBasicBlock *Succ) { + // We need to make sure probability list is either empty or has the same size + // of successor list. When this function is called, we can safely delete all + // probability in the list. + Probs.clear(); + Successors.push_back(Succ); + Succ->addPredecessor(this); +} - Successors.erase(I); +void MachineBasicBlock::removeSuccessor(MachineBasicBlock *Succ, + bool NormalizeSuccProbs) { + succ_iterator I = std::find(Successors.begin(), Successors.end(), Succ); + removeSuccessor(I, NormalizeSuccProbs); } MachineBasicBlock::succ_iterator -MachineBasicBlock::removeSuccessor(succ_iterator I) { +MachineBasicBlock::removeSuccessor(succ_iterator I, bool NormalizeSuccProbs) { assert(I != Successors.end() && "Not a current successor!"); - // If Weight list is empty it means we don't use it (disabled optimization). - if (!Weights.empty()) { - weight_iterator WI = getWeightIterator(I); - Weights.erase(WI); + // If probability list is empty it means we don't use it (disabled + // optimization). + if (!Probs.empty()) { + probability_iterator WI = getProbabilityIterator(I); + Probs.erase(WI); + if (NormalizeSuccProbs) + normalizeSuccProbs(); } (*I)->removePredecessor(this); @@ -537,74 +584,77 @@ void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old, } } assert(OldI != E && "Old is not a successor of this block"); - Old->removePredecessor(this); // If New isn't already a successor, let it take Old's place. if (NewI == E) { + Old->removePredecessor(this); New->addPredecessor(this); *OldI = New; return; } // New is already a successor. - // Update its weight instead of adding a duplicate edge. - if (!Weights.empty()) { - weight_iterator OldWI = getWeightIterator(OldI); - *getWeightIterator(NewI) += *OldWI; - Weights.erase(OldWI); + // Update its probability instead of adding a duplicate edge. + if (!Probs.empty()) { + auto ProbIter = getProbabilityIterator(NewI); + if (!ProbIter->isUnknown()) + *ProbIter += *getProbabilityIterator(OldI); } - Successors.erase(OldI); + removeSuccessor(OldI); } -void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) { - Predecessors.push_back(pred); +void MachineBasicBlock::addPredecessor(MachineBasicBlock *Pred) { + Predecessors.push_back(Pred); } -void MachineBasicBlock::removePredecessor(MachineBasicBlock *pred) { - pred_iterator I = std::find(Predecessors.begin(), Predecessors.end(), pred); +void MachineBasicBlock::removePredecessor(MachineBasicBlock *Pred) { + pred_iterator I = std::find(Predecessors.begin(), Predecessors.end(), Pred); assert(I != Predecessors.end() && "Pred is not a predecessor of this block!"); Predecessors.erase(I); } -void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) { - if (this == fromMBB) +void MachineBasicBlock::transferSuccessors(MachineBasicBlock *FromMBB) { + if (this == FromMBB) return; - while (!fromMBB->succ_empty()) { - MachineBasicBlock *Succ = *fromMBB->succ_begin(); - uint32_t Weight = 0; + while (!FromMBB->succ_empty()) { + MachineBasicBlock *Succ = *FromMBB->succ_begin(); - // If Weight list is empty it means we don't use it (disabled optimization). - if (!fromMBB->Weights.empty()) - Weight = *fromMBB->Weights.begin(); + // If probability list is empty it means we don't use it (disabled optimization). + if (!FromMBB->Probs.empty()) { + auto Prob = *FromMBB->Probs.begin(); + addSuccessor(Succ, Prob); + } else + addSuccessorWithoutProb(Succ); - addSuccessor(Succ, Weight); - fromMBB->removeSuccessor(Succ); + FromMBB->removeSuccessor(Succ); } } void -MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) { - if (this == fromMBB) +MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB) { + if (this == FromMBB) return; - while (!fromMBB->succ_empty()) { - MachineBasicBlock *Succ = *fromMBB->succ_begin(); - uint32_t Weight = 0; - if (!fromMBB->Weights.empty()) - Weight = *fromMBB->Weights.begin(); - addSuccessor(Succ, Weight); - fromMBB->removeSuccessor(Succ); + while (!FromMBB->succ_empty()) { + MachineBasicBlock *Succ = *FromMBB->succ_begin(); + if (!FromMBB->Probs.empty()) { + auto Prob = *FromMBB->Probs.begin(); + addSuccessor(Succ, Prob); + } else + addSuccessorWithoutProb(Succ); + FromMBB->removeSuccessor(Succ); // Fix up any PHI nodes in the successor. for (MachineBasicBlock::instr_iterator MI = Succ->instr_begin(), ME = Succ->instr_end(); MI != ME && MI->isPHI(); ++MI) for (unsigned i = 2, e = MI->getNumOperands()+1; i != e; i += 2) { MachineOperand &MO = MI->getOperand(i); - if (MO.getMBB() == fromMBB) + if (MO.getMBB() == FromMBB) MO.setMBB(this); } } + normalizeSuccProbs(); } bool MachineBasicBlock::isPredecessor(const MachineBasicBlock *MBB) const { @@ -621,14 +671,14 @@ bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const { } bool MachineBasicBlock::canFallThrough() { - MachineFunction::iterator Fallthrough = this; + MachineFunction::iterator Fallthrough = getIterator(); ++Fallthrough; // If FallthroughBlock is off the end of the function, it can't fall through. if (Fallthrough == getParent()->end()) return false; // If FallthroughBlock isn't a successor, no fallthrough is possible. - if (!isSuccessor(Fallthrough)) + if (!isSuccessor(&*Fallthrough)) return false; // Analyze the branches, if any, at the end of the block. @@ -666,11 +716,11 @@ MachineBasicBlock * MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { // Splitting the critical edge to a landing pad block is non-trivial. Don't do // it in this generic function. - if (Succ->isLandingPad()) + if (Succ->isEHPad()) return nullptr; MachineFunction *MF = getParent(); - DebugLoc dl; // FIXME: this is nowhere + DebugLoc DL; // FIXME: this is nowhere // Performance might be harmed on HW that implements branching using exec mask // where both sides of the branches are always executed. @@ -719,7 +769,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { if (LV) for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); I != E; ++I) { - MachineInstr *MI = I; + MachineInstr *MI = &*I; for (MachineInstr::mop_iterator OI = MI->operands_begin(), OE = MI->operands_end(); OI != OE; ++OI) { if (!OI->isReg() || OI->getReg() == 0 || @@ -739,7 +789,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { if (LIS) { for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); I != E; ++I) { - MachineInstr *MI = I; + MachineInstr *MI = &*I; for (MachineInstr::mop_iterator OI = MI->operands_begin(), OE = MI->operands_end(); OI != OE; ++OI) { @@ -761,7 +811,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { if (Indexes) { for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); I != E; ++I) - Terminators.push_back(I); + Terminators.push_back(&*I); } updateTerminator(); @@ -770,7 +820,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { SmallVector<MachineInstr*, 4> NewTerminators; for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); I != E; ++I) - NewTerminators.push_back(I); + NewTerminators.push_back(&*I); for (SmallVectorImpl<MachineInstr*>::iterator I = Terminators.begin(), E = Terminators.end(); I != E; ++I) { @@ -784,17 +834,16 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { NMBB->addSuccessor(Succ); if (!NMBB->isLayoutSuccessor(Succ)) { Cond.clear(); - MF->getSubtarget().getInstrInfo()->InsertBranch(*NMBB, Succ, nullptr, Cond, - dl); + TII->InsertBranch(*NMBB, Succ, nullptr, Cond, DL); if (Indexes) { for (instr_iterator I = NMBB->instr_begin(), E = NMBB->instr_end(); I != E; ++I) { // Some instructions may have been moved to NMBB by updateTerminator(), // so we first remove any instruction that already has an index. - if (Indexes->hasIndex(I)) - Indexes->removeMachineInstrFromMaps(I); - Indexes->insertMachineInstrInMaps(I); + if (Indexes->hasIndex(&*I)) + Indexes->removeMachineInstrFromMaps(&*I); + Indexes->insertMachineInstrInMaps(&*I); } } } @@ -808,9 +857,8 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { i->getOperand(ni+1).setMBB(NMBB); // Inherit live-ins from the successor - for (MachineBasicBlock::livein_iterator I = Succ->livein_begin(), - E = Succ->livein_end(); I != E; ++I) - NMBB->addLiveIn(*I); + for (const auto &LI : Succ->liveins()) + NMBB->addLiveIn(LI); // Update LiveVariables. const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); @@ -822,7 +870,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { if (!(--I)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false)) continue; if (TargetRegisterInfo::isVirtualRegister(Reg)) - LV->getVarInfo(Reg).Kills.push_back(I); + LV->getVarInfo(Reg).Kills.push_back(&*I); DEBUG(dbgs() << "Restored terminator kill: " << *I); break; } @@ -834,10 +882,10 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { if (LIS) { // After splitting the edge and updating SlotIndexes, live intervals may be // in one of two situations, depending on whether this block was the last in - // the function. If the original block was the last in the function, all live - // intervals will end prior to the beginning of the new split block. If the - // original block was not at the end of the function, all live intervals will - // extend to the end of the new split block. + // the function. If the original block was the last in the function, all + // live intervals will end prior to the beginning of the new split block. If + // the original block was not at the end of the function, all live intervals + // will extend to the end of the new split block. bool isLastMBB = std::next(MachineFunction::iterator(NMBB)) == getParent()->end(); @@ -861,7 +909,8 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { LiveInterval &LI = LIS->getInterval(Reg); VNInfo *VNI = LI.getVNInfoAt(PrevIndex); - assert(VNI && "PHI sources should be live out of their predecessors."); + assert(VNI && + "PHI sources should be live out of their predecessors."); LI.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI)); } } @@ -941,7 +990,7 @@ static void unbundleSingleMI(MachineInstr *MI) { MachineBasicBlock::instr_iterator MachineBasicBlock::erase(MachineBasicBlock::instr_iterator I) { - unbundleSingleMI(I); + unbundleSingleMI(&*I); return Insts.erase(I); } @@ -964,25 +1013,22 @@ MachineBasicBlock::insert(instr_iterator I, MachineInstr *MI) { return Insts.insert(I, MI); } -/// removeFromParent - This method unlinks 'this' from the containing function, -/// and returns it, but does not delete it. +/// This method unlinks 'this' from the containing function, and returns it, but +/// does not delete it. MachineBasicBlock *MachineBasicBlock::removeFromParent() { assert(getParent() && "Not embedded in a function!"); getParent()->remove(this); return this; } - -/// eraseFromParent - This method unlinks 'this' from the containing function, -/// and deletes it. +/// This method unlinks 'this' from the containing function, and deletes it. void MachineBasicBlock::eraseFromParent() { assert(getParent() && "Not embedded in a function!"); getParent()->erase(this); } - -/// ReplaceUsesOfBlockWith - Given a machine basic block that branched to -/// 'Old', change the code and CFG so that it branches to 'New' instead. +/// Given a machine basic block that branched to 'Old', change the code and CFG +/// so that it branches to 'New' instead. void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New) { assert(Old != New && "Cannot replace self with self!"); @@ -1004,46 +1050,44 @@ void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old, replaceSuccessor(Old, New); } -/// CorrectExtraCFGEdges - Various pieces of code can cause excess edges in the -/// CFG to be inserted. If we have proven that MBB can only branch to DestA and -/// DestB, remove any other MBB successors from the CFG. DestA and DestB can be -/// null. +/// Various pieces of code can cause excess edges in the CFG to be inserted. If +/// we have proven that MBB can only branch to DestA and DestB, remove any other +/// MBB successors from the CFG. DestA and DestB can be null. /// /// Besides DestA and DestB, retain other edges leading to LandingPads /// (currently there can be only one; we don't check or require that here). /// Note it is possible that DestA and/or DestB are LandingPads. bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA, MachineBasicBlock *DestB, - bool isCond) { + bool IsCond) { // The values of DestA and DestB frequently come from a call to the // 'TargetInstrInfo::AnalyzeBranch' method. We take our meaning of the initial // values from there. // // 1. If both DestA and DestB are null, then the block ends with no branches // (it falls through to its successor). - // 2. If DestA is set, DestB is null, and isCond is false, then the block ends + // 2. If DestA is set, DestB is null, and IsCond is false, then the block ends // with only an unconditional branch. - // 3. If DestA is set, DestB is null, and isCond is true, then the block ends + // 3. If DestA is set, DestB is null, and IsCond is true, then the block ends // with a conditional branch that falls through to a successor (DestB). - // 4. If DestA and DestB is set and isCond is true, then the block ends with a + // 4. If DestA and DestB is set and IsCond is true, then the block ends with a // conditional branch followed by an unconditional branch. DestA is the // 'true' destination and DestB is the 'false' destination. bool Changed = false; - MachineFunction::iterator FallThru = - std::next(MachineFunction::iterator(this)); + MachineFunction::iterator FallThru = std::next(getIterator()); if (!DestA && !DestB) { // Block falls through to successor. - DestA = FallThru; - DestB = FallThru; + DestA = &*FallThru; + DestB = &*FallThru; } else if (DestA && !DestB) { - if (isCond) + if (IsCond) // Block ends in conditional jump that falls through to successor. - DestB = FallThru; + DestB = &*FallThru; } else { - assert(DestA && DestB && isCond && + assert(DestA && DestB && IsCond && "CFG in a bad state. Cannot correct CFG edges"); } @@ -1054,7 +1098,7 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA, while (SI != succ_end()) { const MachineBasicBlock *MBB = *SI; if (!SeenMBBs.insert(MBB).second || - (MBB != DestA && MBB != DestB && !MBB->isLandingPad())) { + (MBB != DestA && MBB != DestB && !MBB->isEHPad())) { // This is a superfluous edge, remove it. SI = removeSuccessor(SI); Changed = true; @@ -1063,11 +1107,13 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA, } } + if (Changed) + normalizeSuccProbs(); return Changed; } -/// findDebugLoc - find the next valid DebugLoc starting at MBBI, skipping -/// any DBG_VALUE instructions. Return UnknownLoc if there is none. +/// Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE +/// instructions. Return UnknownLoc if there is none. DebugLoc MachineBasicBlock::findDebugLoc(instr_iterator MBBI) { DebugLoc DL; @@ -1083,40 +1129,55 @@ MachineBasicBlock::findDebugLoc(instr_iterator MBBI) { return DL; } -/// getSuccWeight - Return weight of the edge from this block to MBB. -/// -uint32_t MachineBasicBlock::getSuccWeight(const_succ_iterator Succ) const { - if (Weights.empty()) - return 0; - - return *getWeightIterator(Succ); +/// Return probability of the edge from this block to MBB. +BranchProbability +MachineBasicBlock::getSuccProbability(const_succ_iterator Succ) const { + if (Probs.empty()) + return BranchProbability(1, succ_size()); + + const auto &Prob = *getProbabilityIterator(Succ); + if (Prob.isUnknown()) { + // For unknown probabilities, collect the sum of all known ones, and evenly + // ditribute the complemental of the sum to each unknown probability. + unsigned KnownProbNum = 0; + auto Sum = BranchProbability::getZero(); + for (auto &P : Probs) { + if (!P.isUnknown()) { + Sum += P; + KnownProbNum++; + } + } + return Sum.getCompl() / (Probs.size() - KnownProbNum); + } else + return Prob; } -/// Set successor weight of a given iterator. -void MachineBasicBlock::setSuccWeight(succ_iterator I, uint32_t weight) { - if (Weights.empty()) +/// Set successor probability of a given iterator. +void MachineBasicBlock::setSuccProbability(succ_iterator I, + BranchProbability Prob) { + assert(!Prob.isUnknown()); + if (Probs.empty()) return; - *getWeightIterator(I) = weight; + *getProbabilityIterator(I) = Prob; } -/// getWeightIterator - Return wight iterator corresonding to the I successor -/// iterator -MachineBasicBlock::weight_iterator MachineBasicBlock:: -getWeightIterator(MachineBasicBlock::succ_iterator I) { - assert(Weights.size() == Successors.size() && "Async weight list!"); - size_t index = std::distance(Successors.begin(), I); - assert(index < Weights.size() && "Not a current successor!"); - return Weights.begin() + index; +/// Return probability iterator corresonding to the I successor iterator +MachineBasicBlock::const_probability_iterator +MachineBasicBlock::getProbabilityIterator( + MachineBasicBlock::const_succ_iterator I) const { + assert(Probs.size() == Successors.size() && "Async probability list!"); + const size_t index = std::distance(Successors.begin(), I); + assert(index < Probs.size() && "Not a current successor!"); + return Probs.begin() + index; } -/// getWeightIterator - Return wight iterator corresonding to the I successor -/// iterator -MachineBasicBlock::const_weight_iterator MachineBasicBlock:: -getWeightIterator(MachineBasicBlock::const_succ_iterator I) const { - assert(Weights.size() == Successors.size() && "Async weight list!"); +/// Return probability iterator corresonding to the I successor iterator. +MachineBasicBlock::probability_iterator +MachineBasicBlock::getProbabilityIterator(MachineBasicBlock::succ_iterator I) { + assert(Probs.size() == Successors.size() && "Async probability list!"); const size_t index = std::distance(Successors.begin(), I); - assert(index < Weights.size() && "Not a current successor!"); - return Weights.begin() + index; + assert(index < Probs.size() && "Not a current successor!"); + return Probs.begin() + index; } /// Return whether (physical) register "Reg" has been <def>ined and not <kill>ed @@ -1138,33 +1199,33 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI, do { --I; - MachineOperandIteratorBase::PhysRegInfo Analysis = + MachineOperandIteratorBase::PhysRegInfo Info = ConstMIOperands(I).analyzePhysReg(Reg, TRI); - if (Analysis.Defines) - // Outputs happen after inputs so they take precedence if both are - // present. - return Analysis.DefinesDead ? LQR_Dead : LQR_Live; + // Defs happen after uses so they take precedence if both are present. - if (Analysis.Kills || Analysis.Clobbers) - // Register killed, so isn't live. + // Register is dead after a dead def of the full register. + if (Info.DeadDef) return LQR_Dead; - - else if (Analysis.ReadsOverlap) - // Defined or read without a previous kill - live. - return Analysis.Reads ? LQR_Live : LQR_OverlappingLive; - + // Register is (at least partially) live after a def. + if (Info.Defined) + return LQR_Live; + // Register is dead after a full kill or clobber and no def. + if (Info.Killed || Info.Clobbered) + return LQR_Dead; + // Register must be live if we read it. + if (Info.Read) + return LQR_Live; } while (I != begin() && --N > 0); } // Did we get to the start of the block? if (I == begin()) { // If so, the register's state is definitely defined by the live-in state. - for (MCRegAliasIterator RAI(Reg, TRI, /*IncludeSelf=*/true); - RAI.isValid(); ++RAI) { + for (MCRegAliasIterator RAI(Reg, TRI, /*IncludeSelf=*/true); RAI.isValid(); + ++RAI) if (isLiveIn(*RAI)) - return (*RAI == Reg) ? LQR_Live : LQR_OverlappingLive; - } + return LQR_Live; return LQR_Dead; } @@ -1176,16 +1237,14 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI, // If this is the last insn in the block, don't search forwards. if (I != end()) { for (++I; I != end() && N > 0; ++I, --N) { - MachineOperandIteratorBase::PhysRegInfo Analysis = + MachineOperandIteratorBase::PhysRegInfo Info = ConstMIOperands(I).analyzePhysReg(Reg, TRI); - if (Analysis.ReadsOverlap) - // Used, therefore must have been live. - return (Analysis.Reads) ? - LQR_Live : LQR_OverlappingLive; - - else if (Analysis.Clobbers || Analysis.Defines) - // Defined (but not read) therefore cannot have been live. + // Register is live when we read it here. + if (Info.Read) + return LQR_Live; + // Register is dead if we can fully overwrite or clobber it here. + if (Info.FullyDefined || Info.Clobbered) return LQR_Dead; } } @@ -1193,3 +1252,17 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI, // At this point we have no idea of the liveness of the register. return LQR_Unknown; } + +const uint32_t * +MachineBasicBlock::getBeginClobberMask(const TargetRegisterInfo *TRI) const { + // EH funclet entry does not preserve any registers. + return isEHFuncletEntry() ? TRI->getNoPreservedMask() : nullptr; +} + +const uint32_t * +MachineBasicBlock::getEndClobberMask(const TargetRegisterInfo *TRI) const { + // If we see a return block with successors, this must be a funclet return, + // which does not preserve any registers. If there are no successors, we don't + // care what kind of return it is, putting a mask after it is a no-op. + return isReturnBlock() && !succ_empty() ? TRI->getNoPreservedMask() : nullptr; +} diff --git a/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp index 9151d99089d6..9119e31bdb3c 100644 --- a/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -57,7 +57,7 @@ struct GraphTraits<MachineBlockFrequencyInfo *> { static inline const NodeType *getEntryNode(const MachineBlockFrequencyInfo *G) { - return G->getFunction()->begin(); + return &G->getFunction()->front(); } static ChildIteratorType child_begin(const NodeType *N) { @@ -143,7 +143,7 @@ bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) { MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); if (!MBFI) MBFI.reset(new ImplType); - MBFI->doFunction(&F, &MBPI, &MLI); + MBFI->calculate(F, MBPI, MLI); #ifndef NDEBUG if (ViewMachineBlockFreqPropagationDAG != GVDT_None) { view(); diff --git a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp index 2969bad4ff98..f5e305645011 100644 --- a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -51,7 +51,7 @@ using namespace llvm; #define DEBUG_TYPE "block-placement" STATISTIC(NumCondBranches, "Number of conditional branches"); -STATISTIC(NumUncondBranches, "Number of uncondittional branches"); +STATISTIC(NumUncondBranches, "Number of unconditional branches"); STATISTIC(CondBranchTakenFreq, "Potential frequency of taking conditional branches"); STATISTIC(UncondBranchTakenFreq, @@ -62,6 +62,11 @@ static cl::opt<unsigned> AlignAllBlock("align-all-blocks", "blocks in the function."), cl::init(0), cl::Hidden); +static cl::opt<unsigned> + AlignAllLoops("align-all-loops", + cl::desc("Force the alignment of all loops in the function."), + cl::init(0), cl::Hidden); + // FIXME: Find a good default for this flag and remove the flag. static cl::opt<unsigned> ExitBlockBias( "block-placement-exit-block-bias", @@ -81,6 +86,29 @@ static cl::opt<unsigned> OutlineOptionalThreshold( "instruction count below this threshold"), cl::init(4), cl::Hidden); +static cl::opt<unsigned> LoopToColdBlockRatio( + "loop-to-cold-block-ratio", + cl::desc("Outline loop blocks from loop chain if (frequency of loop) / " + "(frequency of block) is greater than this ratio"), + cl::init(5), cl::Hidden); + +static cl::opt<bool> + PreciseRotationCost("precise-rotation-cost", + cl::desc("Model the cost of loop rotation more " + "precisely by using profile data."), + cl::init(false), cl::Hidden); + +static cl::opt<unsigned> MisfetchCost( + "misfetch-cost", + cl::desc("Cost that models the probablistic risk of an instruction " + "misfetch due to a jump comparing to falling through, whose cost " + "is zero."), + cl::init(1), cl::Hidden); + +static cl::opt<unsigned> JumpInstCost("jump-inst-cost", + cl::desc("Cost of jump instructions."), + cl::init(1), cl::Hidden); + namespace { class BlockChain; /// \brief Type for our function-wide basic block -> block chain mapping. @@ -246,9 +274,12 @@ class MachineBlockPlacement : public MachineFunctionPass { const BlockFilterSet &LoopBlockSet); MachineBasicBlock *findBestLoopExit(MachineFunction &F, MachineLoop &L, const BlockFilterSet &LoopBlockSet); + BlockFilterSet collectLoopBlockSet(MachineFunction &F, MachineLoop &L); void buildLoopChains(MachineFunction &F, MachineLoop &L); void rotateLoop(BlockChain &LoopChain, MachineBasicBlock *ExitingBB, const BlockFilterSet &LoopBlockSet); + void rotateLoopWithProfile(BlockChain &LoopChain, MachineLoop &L, + const BlockFilterSet &LoopBlockSet); void buildCFGChains(MachineFunction &F); public: @@ -354,31 +385,56 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB, const BranchProbability HotProb(4, 5); // 80% MachineBasicBlock *BestSucc = nullptr; - // FIXME: Due to the performance of the probability and weight routines in - // the MBPI analysis, we manually compute probabilities using the edge - // weights. This is suboptimal as it means that the somewhat subtle - // definition of edge weight semantics is encoded here as well. We should - // improve the MBPI interface to efficiently support query patterns such as - // this. - uint32_t BestWeight = 0; - uint32_t WeightScale = 0; - uint32_t SumWeight = MBPI->getSumForBlock(BB, WeightScale); - DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n"); + auto BestProb = BranchProbability::getZero(); + + // Adjust edge probabilities by excluding edges pointing to blocks that is + // either not in BlockFilter or is already in the current chain. Consider the + // following CFG: + // + // --->A + // | / \ + // | B C + // | \ / \ + // ----D E + // + // Assume A->C is very hot (>90%), and C->D has a 50% probability, then after + // A->C is chosen as a fall-through, D won't be selected as a successor of C + // due to CFG constraint (the probability of C->D is not greater than + // HotProb). If we exclude E that is not in BlockFilter when calculating the + // probability of C->D, D will be selected and we will get A C D B as the + // layout of this loop. + auto AdjustedSumProb = BranchProbability::getOne(); + SmallVector<MachineBasicBlock *, 4> Successors; for (MachineBasicBlock *Succ : BB->successors()) { - if (BlockFilter && !BlockFilter->count(Succ)) - continue; - BlockChain &SuccChain = *BlockToChain[Succ]; - if (&SuccChain == &Chain) { - DEBUG(dbgs() << " " << getBlockName(Succ) << " -> Already merged!\n"); - continue; - } - if (Succ != *SuccChain.begin()) { - DEBUG(dbgs() << " " << getBlockName(Succ) << " -> Mid chain!\n"); - continue; + bool SkipSucc = false; + if (BlockFilter && !BlockFilter->count(Succ)) { + SkipSucc = true; + } else { + BlockChain *SuccChain = BlockToChain[Succ]; + if (SuccChain == &Chain) { + DEBUG(dbgs() << " " << getBlockName(Succ) + << " -> Already merged!\n"); + SkipSucc = true; + } else if (Succ != *SuccChain->begin()) { + DEBUG(dbgs() << " " << getBlockName(Succ) << " -> Mid chain!\n"); + continue; + } } + if (SkipSucc) + AdjustedSumProb -= MBPI->getEdgeProbability(BB, Succ); + else + Successors.push_back(Succ); + } - uint32_t SuccWeight = MBPI->getEdgeWeight(BB, Succ); - BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight); + DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n"); + for (MachineBasicBlock *Succ : Successors) { + BranchProbability SuccProb; + uint32_t SuccProbN = MBPI->getEdgeProbability(BB, Succ).getNumerator(); + uint32_t SuccProbD = AdjustedSumProb.getNumerator(); + if (SuccProbN >= SuccProbD) + SuccProb = BranchProbability::getOne(); + else + SuccProb = BranchProbability(SuccProbN, SuccProbD); // If we outline optional branches, look whether Succ is unavoidable, i.e. // dominates all terminators of the MachineFunction. If it does, other @@ -406,6 +462,7 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB, // Only consider successors which are either "hot", or wouldn't violate // any CFG constraints. + BlockChain &SuccChain = *BlockToChain[Succ]; if (SuccChain.LoopPredecessors != 0) { if (SuccProb < HotProb) { DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb @@ -415,8 +472,9 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB, // Make sure that a hot successor doesn't have a globally more // important predecessor. + auto RealSuccProb = MBPI->getEdgeProbability(BB, Succ); BlockFrequency CandidateEdgeFreq = - MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl(); + MBFI->getBlockFreq(BB) * RealSuccProb * HotProb.getCompl(); bool BadCFGConflict = false; for (MachineBasicBlock *Pred : Succ->predecessors()) { if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) || @@ -440,10 +498,10 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB, << " (prob)" << (SuccChain.LoopPredecessors != 0 ? " (CFG break)" : "") << "\n"); - if (BestSucc && BestWeight >= SuccWeight) + if (BestSucc && BestProb >= SuccProb) continue; BestSucc = Succ; - BestWeight = SuccWeight; + BestProb = SuccProb; } return BestSucc; } @@ -505,14 +563,14 @@ MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock( const BlockFilterSet *BlockFilter) { for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F.end(); I != E; ++I) { - if (BlockFilter && !BlockFilter->count(I)) + if (BlockFilter && !BlockFilter->count(&*I)) continue; - if (BlockToChain[I] != &PlacedChain) { + if (BlockToChain[&*I] != &PlacedChain) { PrevUnplacedBlockIt = I; // Now select the head of the chain to which the unplaced block belongs // as the block to place. This will force the entire chain to be placed, // and satisfies the requirements of merging chains. - return *BlockToChain[I]->begin(); + return *BlockToChain[&*I]->begin(); } } return nullptr; @@ -672,13 +730,8 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L, MachineBasicBlock *OldExitingBB = ExitingBB; BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq; bool HasLoopingSucc = false; - // FIXME: Due to the performance of the probability and weight routines in - // the MBPI analysis, we use the internal weights and manually compute the - // probabilities to avoid quadratic behavior. - uint32_t WeightScale = 0; - uint32_t SumWeight = MBPI->getSumForBlock(MBB, WeightScale); for (MachineBasicBlock *Succ : MBB->successors()) { - if (Succ->isLandingPad()) + if (Succ->isEHPad()) continue; if (Succ == MBB) continue; @@ -690,10 +743,10 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L, continue; } - uint32_t SuccWeight = MBPI->getEdgeWeight(MBB, Succ); + auto SuccProb = MBPI->getEdgeProbability(MBB, Succ); if (LoopBlockSet.count(Succ)) { DEBUG(dbgs() << " looping: " << getBlockName(MBB) << " -> " - << getBlockName(Succ) << " (" << SuccWeight << ")\n"); + << getBlockName(Succ) << " (" << SuccProb << ")\n"); HasLoopingSucc = true; continue; } @@ -705,7 +758,6 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L, BlocksExitingToOuterLoop.insert(MBB); } - BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight); BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(MBB) * SuccProb; DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> " << getBlockName(Succ) << " [L:" << SuccLoopDepth << "] ("; @@ -791,6 +843,188 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end()); } +/// \brief Attempt to rotate a loop based on profile data to reduce branch cost. +/// +/// With profile data, we can determine the cost in terms of missed fall through +/// opportunities when rotating a loop chain and select the best rotation. +/// Basically, there are three kinds of cost to consider for each rotation: +/// 1. The possibly missed fall through edge (if it exists) from BB out of +/// the loop to the loop header. +/// 2. The possibly missed fall through edges (if they exist) from the loop +/// exits to BB out of the loop. +/// 3. The missed fall through edge (if it exists) from the last BB to the +/// first BB in the loop chain. +/// Therefore, the cost for a given rotation is the sum of costs listed above. +/// We select the best rotation with the smallest cost. +void MachineBlockPlacement::rotateLoopWithProfile( + BlockChain &LoopChain, MachineLoop &L, const BlockFilterSet &LoopBlockSet) { + auto HeaderBB = L.getHeader(); + auto HeaderIter = std::find(LoopChain.begin(), LoopChain.end(), HeaderBB); + auto RotationPos = LoopChain.end(); + + BlockFrequency SmallestRotationCost = BlockFrequency::getMaxFrequency(); + + // A utility lambda that scales up a block frequency by dividing it by a + // branch probability which is the reciprocal of the scale. + auto ScaleBlockFrequency = [](BlockFrequency Freq, + unsigned Scale) -> BlockFrequency { + if (Scale == 0) + return 0; + // Use operator / between BlockFrequency and BranchProbability to implement + // saturating multiplication. + return Freq / BranchProbability(1, Scale); + }; + + // Compute the cost of the missed fall-through edge to the loop header if the + // chain head is not the loop header. As we only consider natural loops with + // single header, this computation can be done only once. + BlockFrequency HeaderFallThroughCost(0); + for (auto *Pred : HeaderBB->predecessors()) { + BlockChain *PredChain = BlockToChain[Pred]; + if (!LoopBlockSet.count(Pred) && + (!PredChain || Pred == *std::prev(PredChain->end()))) { + auto EdgeFreq = + MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, HeaderBB); + auto FallThruCost = ScaleBlockFrequency(EdgeFreq, MisfetchCost); + // If the predecessor has only an unconditional jump to the header, we + // need to consider the cost of this jump. + if (Pred->succ_size() == 1) + FallThruCost += ScaleBlockFrequency(EdgeFreq, JumpInstCost); + HeaderFallThroughCost = std::max(HeaderFallThroughCost, FallThruCost); + } + } + + // Here we collect all exit blocks in the loop, and for each exit we find out + // its hottest exit edge. For each loop rotation, we define the loop exit cost + // as the sum of frequencies of exit edges we collect here, excluding the exit + // edge from the tail of the loop chain. + SmallVector<std::pair<MachineBasicBlock *, BlockFrequency>, 4> ExitsWithFreq; + for (auto BB : LoopChain) { + auto LargestExitEdgeProb = BranchProbability::getZero(); + for (auto *Succ : BB->successors()) { + BlockChain *SuccChain = BlockToChain[Succ]; + if (!LoopBlockSet.count(Succ) && + (!SuccChain || Succ == *SuccChain->begin())) { + auto SuccProb = MBPI->getEdgeProbability(BB, Succ); + LargestExitEdgeProb = std::max(LargestExitEdgeProb, SuccProb); + } + } + if (LargestExitEdgeProb > BranchProbability::getZero()) { + auto ExitFreq = MBFI->getBlockFreq(BB) * LargestExitEdgeProb; + ExitsWithFreq.emplace_back(BB, ExitFreq); + } + } + + // In this loop we iterate every block in the loop chain and calculate the + // cost assuming the block is the head of the loop chain. When the loop ends, + // we should have found the best candidate as the loop chain's head. + for (auto Iter = LoopChain.begin(), TailIter = std::prev(LoopChain.end()), + EndIter = LoopChain.end(); + Iter != EndIter; Iter++, TailIter++) { + // TailIter is used to track the tail of the loop chain if the block we are + // checking (pointed by Iter) is the head of the chain. + if (TailIter == LoopChain.end()) + TailIter = LoopChain.begin(); + + auto TailBB = *TailIter; + + // Calculate the cost by putting this BB to the top. + BlockFrequency Cost = 0; + + // If the current BB is the loop header, we need to take into account the + // cost of the missed fall through edge from outside of the loop to the + // header. + if (Iter != HeaderIter) + Cost += HeaderFallThroughCost; + + // Collect the loop exit cost by summing up frequencies of all exit edges + // except the one from the chain tail. + for (auto &ExitWithFreq : ExitsWithFreq) + if (TailBB != ExitWithFreq.first) + Cost += ExitWithFreq.second; + + // The cost of breaking the once fall-through edge from the tail to the top + // of the loop chain. Here we need to consider three cases: + // 1. If the tail node has only one successor, then we will get an + // additional jmp instruction. So the cost here is (MisfetchCost + + // JumpInstCost) * tail node frequency. + // 2. If the tail node has two successors, then we may still get an + // additional jmp instruction if the layout successor after the loop + // chain is not its CFG successor. Note that the more frequently executed + // jmp instruction will be put ahead of the other one. Assume the + // frequency of those two branches are x and y, where x is the frequency + // of the edge to the chain head, then the cost will be + // (x * MisfetechCost + min(x, y) * JumpInstCost) * tail node frequency. + // 3. If the tail node has more than two successors (this rarely happens), + // we won't consider any additional cost. + if (TailBB->isSuccessor(*Iter)) { + auto TailBBFreq = MBFI->getBlockFreq(TailBB); + if (TailBB->succ_size() == 1) + Cost += ScaleBlockFrequency(TailBBFreq.getFrequency(), + MisfetchCost + JumpInstCost); + else if (TailBB->succ_size() == 2) { + auto TailToHeadProb = MBPI->getEdgeProbability(TailBB, *Iter); + auto TailToHeadFreq = TailBBFreq * TailToHeadProb; + auto ColderEdgeFreq = TailToHeadProb > BranchProbability(1, 2) + ? TailBBFreq * TailToHeadProb.getCompl() + : TailToHeadFreq; + Cost += ScaleBlockFrequency(TailToHeadFreq, MisfetchCost) + + ScaleBlockFrequency(ColderEdgeFreq, JumpInstCost); + } + } + + DEBUG(dbgs() << "The cost of loop rotation by making " << getBlockNum(*Iter) + << " to the top: " << Cost.getFrequency() << "\n"); + + if (Cost < SmallestRotationCost) { + SmallestRotationCost = Cost; + RotationPos = Iter; + } + } + + if (RotationPos != LoopChain.end()) { + DEBUG(dbgs() << "Rotate loop by making " << getBlockNum(*RotationPos) + << " to the top\n"); + std::rotate(LoopChain.begin(), RotationPos, LoopChain.end()); + } +} + +/// \brief Collect blocks in the given loop that are to be placed. +/// +/// When profile data is available, exclude cold blocks from the returned set; +/// otherwise, collect all blocks in the loop. +MachineBlockPlacement::BlockFilterSet +MachineBlockPlacement::collectLoopBlockSet(MachineFunction &F, MachineLoop &L) { + BlockFilterSet LoopBlockSet; + + // Filter cold blocks off from LoopBlockSet when profile data is available. + // Collect the sum of frequencies of incoming edges to the loop header from + // outside. If we treat the loop as a super block, this is the frequency of + // the loop. Then for each block in the loop, we calculate the ratio between + // its frequency and the frequency of the loop block. When it is too small, + // don't add it to the loop chain. If there are outer loops, then this block + // will be merged into the first outer loop chain for which this block is not + // cold anymore. This needs precise profile data and we only do this when + // profile data is available. + if (F.getFunction()->getEntryCount()) { + BlockFrequency LoopFreq(0); + for (auto LoopPred : L.getHeader()->predecessors()) + if (!L.contains(LoopPred)) + LoopFreq += MBFI->getBlockFreq(LoopPred) * + MBPI->getEdgeProbability(LoopPred, L.getHeader()); + + for (MachineBasicBlock *LoopBB : L.getBlocks()) { + auto Freq = MBFI->getBlockFreq(LoopBB).getFrequency(); + if (Freq == 0 || LoopFreq.getFrequency() / Freq > LoopToColdBlockRatio) + continue; + LoopBlockSet.insert(LoopBB); + } + } else + LoopBlockSet.insert(L.block_begin(), L.block_end()); + + return LoopBlockSet; +} + /// \brief Forms basic block chains from the natural loop structures. /// /// These chains are designed to preserve the existing *structure* of the code @@ -805,19 +1039,27 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, buildLoopChains(F, *InnerLoop); SmallVector<MachineBasicBlock *, 16> BlockWorkList; - BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end()); + BlockFilterSet LoopBlockSet = collectLoopBlockSet(F, L); + + // Check if we have profile data for this function. If yes, we will rotate + // this loop by modeling costs more precisely which requires the profile data + // for better layout. + bool RotateLoopWithProfile = + PreciseRotationCost && F.getFunction()->getEntryCount(); // First check to see if there is an obviously preferable top block for the // loop. This will default to the header, but may end up as one of the // predecessors to the header if there is one which will result in strictly // fewer branches in the loop body. - MachineBasicBlock *LoopTop = findBestLoopTop(L, LoopBlockSet); + // When we use profile data to rotate the loop, this is unnecessary. + MachineBasicBlock *LoopTop = + RotateLoopWithProfile ? L.getHeader() : findBestLoopTop(L, LoopBlockSet); // If we selected just the header for the loop top, look for a potentially // profitable exit block in the event that rotating the loop can eliminate // branches by placing an exit edge at the bottom. MachineBasicBlock *ExitingBB = nullptr; - if (LoopTop == L.getHeader()) + if (!RotateLoopWithProfile && LoopTop == L.getHeader()) ExitingBB = findBestLoopExit(F, L, LoopBlockSet); BlockChain &LoopChain = *BlockToChain[LoopTop]; @@ -828,7 +1070,8 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, SmallPtrSet<BlockChain *, 4> UpdatedPreds; assert(LoopChain.LoopPredecessors == 0); UpdatedPreds.insert(&LoopChain); - for (MachineBasicBlock *LoopBB : L.getBlocks()) { + + for (MachineBasicBlock *LoopBB : LoopBlockSet) { BlockChain &Chain = *BlockToChain[LoopBB]; if (!UpdatedPreds.insert(&Chain).second) continue; @@ -848,7 +1091,11 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, } buildChain(LoopTop, LoopChain, BlockWorkList, &LoopBlockSet); - rotateLoop(LoopChain, ExitingBB, LoopBlockSet); + + if (RotateLoopWithProfile) + rotateLoopWithProfile(LoopChain, L, LoopBlockSet); + else + rotateLoop(LoopChain, ExitingBB, LoopBlockSet); DEBUG({ // Crash at the end so we get all of the debugging output first. @@ -889,7 +1136,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // the assumptions of the remaining algorithm. SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch. for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { - MachineBasicBlock *BB = FI; + MachineBasicBlock *BB = &*FI; BlockChain *Chain = new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB); // Also, merge any blocks which we cannot reason about and must preserve @@ -900,8 +1147,8 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough()) break; - MachineFunction::iterator NextFI(std::next(FI)); - MachineBasicBlock *NextBB = NextFI; + MachineFunction::iterator NextFI = std::next(FI); + MachineBasicBlock *NextBB = &*NextFI; // Ensure that the layout successor is a viable block, as we know that // fallthrough is a possibility. assert(NextFI != FE && "Can't fallthrough past the last block."); @@ -1004,7 +1251,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // Update the terminator of the previous block. if (ChainBB == *FunctionChain.begin()) continue; - MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(ChainBB)); + MachineBasicBlock *PrevBB = &*std::prev(MachineFunction::iterator(ChainBB)); // FIXME: It would be awesome of updateTerminator would just return rather // than assert when the branch cannot be analyzed in order to remove this @@ -1035,14 +1282,16 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { } // If PrevBB has a two-way branch, try to re-order the branches - // such that we branch to the successor with higher weight first. + // such that we branch to the successor with higher probability first. if (TBB && !Cond.empty() && FBB && - MBPI->getEdgeWeight(PrevBB, FBB) > MBPI->getEdgeWeight(PrevBB, TBB) && + MBPI->getEdgeProbability(PrevBB, FBB) > + MBPI->getEdgeProbability(PrevBB, TBB) && !TII->ReverseBranchCondition(Cond)) { DEBUG(dbgs() << "Reverse order of the two branches: " << getBlockName(PrevBB) << "\n"); - DEBUG(dbgs() << " Edge weight: " << MBPI->getEdgeWeight(PrevBB, FBB) - << " vs " << MBPI->getEdgeWeight(PrevBB, TBB) << "\n"); + DEBUG(dbgs() << " Edge probability: " + << MBPI->getEdgeProbability(PrevBB, FBB) << " vs " + << MBPI->getEdgeProbability(PrevBB, TBB) << "\n"); DebugLoc dl; // FIXME: this is nowhere TII->RemoveBranch(*PrevBB); TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl); @@ -1064,13 +1313,14 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // exclusively on the loop info here so that we can align backedges in // unnatural CFGs and backedges that were introduced purely because of the // loop rotations done during this layout pass. + // FIXME: Use Function::optForSize(). if (F.getFunction()->hasFnAttribute(Attribute::OptimizeForSize)) return; if (FunctionChain.begin() == FunctionChain.end()) return; // Empty chain. const BranchProbability ColdProb(1, 5); // 20% - BlockFrequency EntryFreq = MBFI->getBlockFreq(F.begin()); + BlockFrequency EntryFreq = MBFI->getBlockFreq(&F.front()); BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb; for (MachineBasicBlock *ChainBB : FunctionChain) { if (ChainBB == *FunctionChain.begin()) @@ -1084,6 +1334,11 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { if (!L) continue; + if (AlignAllLoops) { + ChainBB->setAlignment(AlignAllLoops); + continue; + } + unsigned Align = TLI->getPrefLoopAlignment(L); if (!Align) continue; // Don't care about loop alignment. @@ -1224,4 +1479,3 @@ bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) { return false; } - diff --git a/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp index 6fbc2be70486..cf6d4018cb70 100644 --- a/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp @@ -28,91 +28,48 @@ char MachineBranchProbabilityInfo::ID = 0; void MachineBranchProbabilityInfo::anchor() { } -uint32_t MachineBranchProbabilityInfo:: -getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const { - // First we compute the sum with 64-bits of precision, ensuring that cannot - // overflow by bounding the number of weights considered. Hopefully no one - // actually needs 2^32 successors. - assert(MBB->succ_size() < UINT32_MAX); - uint64_t Sum = 0; - Scale = 1; - for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), - E = MBB->succ_end(); I != E; ++I) { - uint32_t Weight = getEdgeWeight(MBB, I); - Sum += Weight; - } - - // If the computed sum fits in 32-bits, we're done. - if (Sum <= UINT32_MAX) - return Sum; - - // Otherwise, compute the scale necessary to cause the weights to fit, and - // re-sum with that scale applied. - assert((Sum / UINT32_MAX) < UINT32_MAX); - Scale = (Sum / UINT32_MAX) + 1; - Sum = 0; - for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), - E = MBB->succ_end(); I != E; ++I) { - uint32_t Weight = getEdgeWeight(MBB, I); - Sum += Weight / Scale; - } - assert(Sum <= UINT32_MAX); - return Sum; -} - -uint32_t MachineBranchProbabilityInfo:: -getEdgeWeight(const MachineBasicBlock *Src, - MachineBasicBlock::const_succ_iterator Dst) const { - uint32_t Weight = Src->getSuccWeight(Dst); - if (!Weight) - return DEFAULT_WEIGHT; - return Weight; +BranchProbability MachineBranchProbabilityInfo::getEdgeProbability( + const MachineBasicBlock *Src, + MachineBasicBlock::const_succ_iterator Dst) const { + return Src->getSuccProbability(Dst); } -uint32_t MachineBranchProbabilityInfo:: -getEdgeWeight(const MachineBasicBlock *Src, - const MachineBasicBlock *Dst) const { +BranchProbability MachineBranchProbabilityInfo::getEdgeProbability( + const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const { // This is a linear search. Try to use the const_succ_iterator version when // possible. - return getEdgeWeight(Src, std::find(Src->succ_begin(), Src->succ_end(), Dst)); + return getEdgeProbability(Src, + std::find(Src->succ_begin(), Src->succ_end(), Dst)); } bool MachineBranchProbabilityInfo::isEdgeHot(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const { // Hot probability is at least 4/5 = 80% - // FIXME: Compare against a static "hot" BranchProbability. - return getEdgeProbability(Src, Dst) > BranchProbability(4, 5); + static BranchProbability HotProb(4, 5); + return getEdgeProbability(Src, Dst) > HotProb; } MachineBasicBlock * MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const { - uint32_t MaxWeight = 0; + auto MaxProb = BranchProbability::getZero(); MachineBasicBlock *MaxSucc = nullptr; for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { - uint32_t Weight = getEdgeWeight(MBB, I); - if (Weight > MaxWeight) { - MaxWeight = Weight; + auto Prob = getEdgeProbability(MBB, I); + if (Prob > MaxProb) { + MaxProb = Prob; MaxSucc = *I; } } - if (getEdgeProbability(MBB, MaxSucc) >= BranchProbability(4, 5)) + static BranchProbability HotProb(4, 5); + if (getEdgeProbability(MBB, MaxSucc) >= HotProb) return MaxSucc; return nullptr; } -BranchProbability MachineBranchProbabilityInfo::getEdgeProbability( - const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const { - uint32_t Scale = 1; - uint32_t D = getSumForBlock(Src, Scale); - uint32_t N = getEdgeWeight(Src, Dst) / Scale; - - return BranchProbability(N, D); -} - raw_ostream &MachineBranchProbabilityInfo::printEdgeProbability( raw_ostream &OS, const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const { diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp index 87aaaa0834cf..021707b7c3c7 100644 --- a/contrib/llvm/lib/CodeGen/MachineCSE.cpp +++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp @@ -57,7 +57,7 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); - AU.addRequired<AliasAnalysis>(); + AU.addRequired<AAResultsWrapperPass>(); AU.addPreservedID(MachineLoopInfoID); AU.addRequired<MachineDominatorTree>(); AU.addPreserved<MachineDominatorTree>(); @@ -111,7 +111,7 @@ char &llvm::MachineCSEID = MachineCSE::ID; INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse", "Machine Common Subexpression Elimination", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(MachineCSE, "machine-cse", "Machine Common Subexpression Elimination", false, false) @@ -714,7 +714,7 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { TII = MF.getSubtarget().getInstrInfo(); TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); - AA = &getAnalysis<AliasAnalysis>(); + AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); DT = &getAnalysis<MachineDominatorTree>(); LookAheadLimit = TII->getMachineCSELookAheadLimit(); return PerformCSE(DT->getRootNode()); diff --git a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp index f33d0e6a28e5..fa43c4dfa05a 100644 --- a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp @@ -10,6 +10,7 @@ // The machine combiner pass uses machine trace metrics to ensure the combined // instructions does not lengthen the critical path or the resource depth. //===----------------------------------------------------------------------===// + #define DEBUG_TYPE "machine-combiner" #include "llvm/ADT/Statistic.h" @@ -68,10 +69,10 @@ private: MachineTraceMetrics::Trace BlockTrace); bool improvesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root, - MachineTraceMetrics::Trace BlockTrace, - SmallVectorImpl<MachineInstr *> &InsInstrs, - DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, - bool NewCodeHasLessInsts); + MachineTraceMetrics::Trace BlockTrace, + SmallVectorImpl<MachineInstr *> &InsInstrs, + DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, + MachineCombinerPattern Pattern); bool preservesResourceLen(MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace, SmallVectorImpl<MachineInstr *> &InsInstrs, @@ -122,9 +123,9 @@ unsigned MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs, DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, MachineTraceMetrics::Trace BlockTrace) { - SmallVector<unsigned, 16> InstrDepth; - assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n"); + assert(TSchedModel.hasInstrSchedModelOrItineraries() && + "Missing machine model\n"); // For each instruction in the new sequence compute the depth based on the // operands. Use the trace information when possible. For new operands which @@ -180,8 +181,8 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs, /// \returns Latency of \p NewRoot unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot, MachineTraceMetrics::Trace BlockTrace) { - - assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n"); + assert(TSchedModel.hasInstrSchedModelOrItineraries() && + "Missing machine model\n"); // Check each definition in NewRoot and compute the latency unsigned NewRootLatency = 0; @@ -202,62 +203,86 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot, NewRoot, NewRoot->findRegisterDefOperandIdx(MO.getReg()), UseMO, UseMO->findRegisterUseOperandIdx(MO.getReg())); } else { - LatencyOp = TSchedModel.computeInstrLatency(NewRoot->getOpcode()); + LatencyOp = TSchedModel.computeInstrLatency(NewRoot); } NewRootLatency = std::max(NewRootLatency, LatencyOp); } return NewRootLatency; } -/// True when the new instruction sequence does not lengthen the critical path -/// and the new sequence has less instructions or the new sequence improves the -/// critical path. +/// The combiner's goal may differ based on which pattern it is attempting +/// to optimize. +enum class CombinerObjective { + MustReduceDepth, // The data dependency chain must be improved. + Default // The critical path must not be lengthened. +}; + +static CombinerObjective getCombinerObjective(MachineCombinerPattern P) { + // TODO: If C++ ever gets a real enum class, make this part of the + // MachineCombinerPattern class. + switch (P) { + case MachineCombinerPattern::REASSOC_AX_BY: + case MachineCombinerPattern::REASSOC_AX_YB: + case MachineCombinerPattern::REASSOC_XA_BY: + case MachineCombinerPattern::REASSOC_XA_YB: + return CombinerObjective::MustReduceDepth; + default: + return CombinerObjective::Default; + } +} + /// The DAGCombine code sequence ends in MI (Machine Instruction) Root. /// The new code sequence ends in MI NewRoot. A necessary condition for the new /// sequence to replace the old sequence is that it cannot lengthen the critical -/// path. This is decided by the formula: -/// (NewRootDepth + NewRootLatency) <= (RootDepth + RootLatency + RootSlack)). -/// If the new sequence has an equal length critical path but does not reduce -/// the number of instructions (NewCodeHasLessInsts is false), then it is not -/// considered an improvement. The slack is the number of cycles Root can be -/// delayed before the critical patch becomes longer. +/// path. The definition of "improve" may be restricted by specifying that the +/// new path improves the data dependency chain (MustReduceDepth). bool MachineCombiner::improvesCriticalPathLen( MachineBasicBlock *MBB, MachineInstr *Root, MachineTraceMetrics::Trace BlockTrace, SmallVectorImpl<MachineInstr *> &InsInstrs, DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, - bool NewCodeHasLessInsts) { - - assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n"); + MachineCombinerPattern Pattern) { + assert(TSchedModel.hasInstrSchedModelOrItineraries() && + "Missing machine model\n"); // NewRoot is the last instruction in the \p InsInstrs vector. - // Get depth and latency of NewRoot. unsigned NewRootIdx = InsInstrs.size() - 1; MachineInstr *NewRoot = InsInstrs[NewRootIdx]; - unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace); - unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace); - // Get depth, latency and slack of Root. + // Get depth and latency of NewRoot and Root. + unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace); unsigned RootDepth = BlockTrace.getInstrCycles(Root).Depth; + + DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root << "\n"; + dbgs() << " NewRootDepth: " << NewRootDepth << "\n"; + dbgs() << " RootDepth: " << RootDepth << "\n"); + + // For a transform such as reassociation, the cost equation is + // conservatively calculated so that we must improve the depth (data + // dependency cycles) in the critical path to proceed with the transform. + // Being conservative also protects against inaccuracies in the underlying + // machine trace metrics and CPU models. + if (getCombinerObjective(Pattern) == CombinerObjective::MustReduceDepth) + return NewRootDepth < RootDepth; + + // A more flexible cost calculation for the critical path includes the slack + // of the original code sequence. This may allow the transform to proceed + // even if the instruction depths (data dependency cycles) become worse. + unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace); unsigned RootLatency = TSchedModel.computeInstrLatency(Root); unsigned RootSlack = BlockTrace.getInstrSlack(Root); - DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root << "\n"; - dbgs() << " NewRootDepth: " << NewRootDepth - << " NewRootLatency: " << NewRootLatency << "\n"; - dbgs() << " RootDepth: " << RootDepth << " RootLatency: " << RootLatency - << " RootSlack: " << RootSlack << "\n"; - dbgs() << " NewRootDepth + NewRootLatency " + DEBUG(dbgs() << " NewRootLatency: " << NewRootLatency << "\n"; + dbgs() << " RootLatency: " << RootLatency << "\n"; + dbgs() << " RootSlack: " << RootSlack << "\n"; + dbgs() << " NewRootDepth + NewRootLatency = " << NewRootDepth + NewRootLatency << "\n"; - dbgs() << " RootDepth + RootLatency + RootSlack " + dbgs() << " RootDepth + RootLatency + RootSlack = " << RootDepth + RootLatency + RootSlack << "\n";); unsigned NewCycleCount = NewRootDepth + NewRootLatency; unsigned OldCycleCount = RootDepth + RootLatency + RootSlack; - if (NewCodeHasLessInsts) - return NewCycleCount <= OldCycleCount; - else - return NewCycleCount < OldCycleCount; + return NewCycleCount <= OldCycleCount; } /// helper routine to convert instructions into SC @@ -271,11 +296,14 @@ void MachineCombiner::instr2instrSC( InstrsSC.push_back(SC); } } + /// True when the new instructions do not increase resource length bool MachineCombiner::preservesResourceLen( MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace, SmallVectorImpl<MachineInstr *> &InsInstrs, SmallVectorImpl<MachineInstr *> &DelInstrs) { + if (!TSchedModel.hasInstrSchedModel()) + return true; // Compute current resource length @@ -310,7 +338,7 @@ bool MachineCombiner::preservesResourceLen( bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) { if (OptSize && (NewSize < OldSize)) return true; - if (!TSchedModel.hasInstrSchedModel()) + if (!TSchedModel.hasInstrSchedModelOrItineraries()) return true; return false; } @@ -332,7 +360,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { auto &MI = *BlockIter++; DEBUG(dbgs() << "INSTR "; MI.dump(); dbgs() << "\n";); - SmallVector<MachineCombinerPattern::MC_PATTERN, 16> Patterns; + SmallVector<MachineCombinerPattern, 16> Patterns; // The motivating example is: // // MUL Other MUL_op1 MUL_op2 Other @@ -358,54 +386,55 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { // mostly one pattern, and getMachineCombinerPatterns() can order patterns // based on an internal cost heuristic. - if (TII->getMachineCombinerPatterns(MI, Patterns)) { - for (auto P : Patterns) { - SmallVector<MachineInstr *, 16> InsInstrs; - SmallVector<MachineInstr *, 16> DelInstrs; - DenseMap<unsigned, unsigned> InstrIdxForVirtReg; - if (!MinInstr) - MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); - MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB); + if (!TII->getMachineCombinerPatterns(MI, Patterns)) + continue; + + for (auto P : Patterns) { + SmallVector<MachineInstr *, 16> InsInstrs; + SmallVector<MachineInstr *, 16> DelInstrs; + DenseMap<unsigned, unsigned> InstrIdxForVirtReg; + if (!MinInstr) + MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); + MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB); + Traces->verifyAnalysis(); + TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs, + InstrIdxForVirtReg); + unsigned NewInstCount = InsInstrs.size(); + unsigned OldInstCount = DelInstrs.size(); + // Found pattern, but did not generate alternative sequence. + // This can happen e.g. when an immediate could not be materialized + // in a single instruction. + if (!NewInstCount) + continue; + + // Substitute when we optimize for codesize and the new sequence has + // fewer instructions OR + // the new sequence neither lengthens the critical path nor increases + // resource pressure. + if (doSubstitute(NewInstCount, OldInstCount) || + (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, + InstrIdxForVirtReg, P) && + preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) { + for (auto *InstrPtr : InsInstrs) + MBB->insert((MachineBasicBlock::iterator) &MI, InstrPtr); + for (auto *InstrPtr : DelInstrs) + InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval(); + + Changed = true; + ++NumInstCombined; + + Traces->invalidate(MBB); Traces->verifyAnalysis(); - TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs, - InstrIdxForVirtReg); - unsigned NewInstCount = InsInstrs.size(); - unsigned OldInstCount = DelInstrs.size(); - // Found pattern, but did not generate alternative sequence. - // This can happen e.g. when an immediate could not be materialized - // in a single instruction. - if (!NewInstCount) - continue; - // Substitute when we optimize for codesize and the new sequence has - // fewer instructions OR - // the new sequence neither lengthens the critical path nor increases - // resource pressure. - if (doSubstitute(NewInstCount, OldInstCount) || - (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, - InstrIdxForVirtReg, - NewInstCount < OldInstCount) && - preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) { - for (auto *InstrPtr : InsInstrs) - MBB->insert((MachineBasicBlock::iterator) &MI, InstrPtr); - for (auto *InstrPtr : DelInstrs) - InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval(); - - Changed = true; - ++NumInstCombined; - - Traces->invalidate(MBB); - Traces->verifyAnalysis(); - // Eagerly stop after the first pattern fires. - break; - } else { - // Cleanup instructions of the alternative code sequence. There is no - // use for them. - MachineFunction *MF = MBB->getParent(); - for (auto *InstrPtr : InsInstrs) - MF->DeleteMachineInstr(InstrPtr); - } - InstrIdxForVirtReg.clear(); + // Eagerly stop after the first pattern fires. + break; + } else { + // Cleanup instructions of the alternative code sequence. There is no + // use for them. + MachineFunction *MF = MBB->getParent(); + for (auto *InstrPtr : InsInstrs) + MF->DeleteMachineInstr(InstrPtr); } + InstrIdxForVirtReg.clear(); } } @@ -420,9 +449,8 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) { TSchedModel.init(SchedModel, &STI, TII); MRI = &MF.getRegInfo(); Traces = &getAnalysis<MachineTraceMetrics>(); - MinInstr = 0; - - OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize); + MinInstr = nullptr; + OptSize = MF.getFunction()->optForSize(); DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n'); if (!TII->useMachineCombiner()) { diff --git a/contrib/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm/lib/CodeGen/MachineFunction.cpp index 9856e70edaef..ca4bb1c6ad49 100644 --- a/contrib/llvm/lib/CodeGen/MachineFunction.cpp +++ b/contrib/llvm/lib/CodeGen/MachineFunction.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/EHPersonalities.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionInitializer.h" @@ -26,6 +27,8 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" @@ -44,6 +47,11 @@ using namespace llvm; #define DEBUG_TYPE "codegen" +static cl::opt<unsigned> + AlignAllFunctions("align-all-functions", + cl::desc("Force the alignment of all functions."), + cl::init(0), cl::Hidden); + void MachineFunctionInitializer::anchor() {} //===----------------------------------------------------------------------===// @@ -79,12 +87,27 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, Alignment = STI->getTargetLowering()->getMinFunctionAlignment(); // FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn. + // FIXME: Use Function::optForSize(). if (!Fn->hasFnAttribute(Attribute::OptimizeForSize)) Alignment = std::max(Alignment, STI->getTargetLowering()->getPrefFunctionAlignment()); + if (AlignAllFunctions) + Alignment = AlignAllFunctions; + FunctionNumber = FunctionNum; JumpTableInfo = nullptr; + + if (isFuncletEHPersonality(classifyEHPersonality( + F->hasPersonalityFn() ? F->getPersonalityFn() : nullptr))) { + WinEHInfo = new (Allocator) WinEHFuncInfo(); + } + + assert(TM.isCompatibleDataLayout(getDataLayout()) && + "Can't create a MachineFunction using a Module with a " + "Target-incompatible DataLayout attached\n"); + + PSVManager = llvm::make_unique<PseudoSourceValueManager>(); } MachineFunction::~MachineFunction() { @@ -117,6 +140,11 @@ MachineFunction::~MachineFunction() { JumpTableInfo->~MachineJumpTableInfo(); Allocator.Deallocate(JumpTableInfo); } + + if (WinEHInfo) { + WinEHInfo->~WinEHFuncInfo(); + Allocator.Deallocate(WinEHInfo); + } } const DataLayout &MachineFunction::getDataLayout() const { @@ -149,7 +177,7 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) { if (MBB == nullptr) MBBI = begin(); else - MBBI = MBB; + MBBI = MBB->getIterator(); // Figure out the block number this should have. unsigned BlockNo = 0; @@ -169,7 +197,7 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) { if (MBBNumbering[BlockNo]) MBBNumbering[BlockNo]->setNumber(-1); - MBBNumbering[BlockNo] = MBBI; + MBBNumbering[BlockNo] = &*MBBI; MBBI->setNumber(BlockNo); } } @@ -322,6 +350,13 @@ MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin, return std::make_pair(Result, Result + Num); } +const char *MachineFunction::createExternalSymbolName(StringRef Name) { + char *Dest = Allocator.Allocate<char>(Name.size() + 1); + std::copy(Name.begin(), Name.end(), Dest); + Dest[Name.size()] = 0; + return Dest; +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void MachineFunction::dump() const { print(dbgs()); @@ -593,10 +628,9 @@ BitVector MachineFrameInfo::getPristineRegs(const MachineFunction &MF) const { BV.set(*CSR); // Saved CSRs are not pristine. - const std::vector<CalleeSavedInfo> &CSI = getCalleeSavedInfo(); - for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), - E = CSI.end(); I != E; ++I) - BV.reset(I->getReg()); + for (auto &I : getCalleeSavedInfo()) + for (MCSubRegIterator S(I.getReg(), TRI, true); S.isValid(); ++S) + BV.reset(*S); return BV; } @@ -801,42 +835,26 @@ Type *MachineConstantPoolEntry::getType() const { return Val.ConstVal->getType(); } - -unsigned MachineConstantPoolEntry::getRelocationInfo() const { +bool MachineConstantPoolEntry::needsRelocation() const { if (isMachineConstantPoolEntry()) - return Val.MachineCPVal->getRelocationInfo(); - return Val.ConstVal->getRelocationInfo(); + return true; + return Val.ConstVal->needsRelocation(); } SectionKind MachineConstantPoolEntry::getSectionKind(const DataLayout *DL) const { - SectionKind Kind; - switch (getRelocationInfo()) { + if (needsRelocation()) + return SectionKind::getReadOnlyWithRel(); + switch (DL->getTypeAllocSize(getType())) { + case 4: + return SectionKind::getMergeableConst4(); + case 8: + return SectionKind::getMergeableConst8(); + case 16: + return SectionKind::getMergeableConst16(); default: - llvm_unreachable("Unknown section kind"); - case Constant::GlobalRelocations: - Kind = SectionKind::getReadOnlyWithRel(); - break; - case Constant::LocalRelocation: - Kind = SectionKind::getReadOnlyWithRelLocal(); - break; - case Constant::NoRelocation: - switch (DL->getTypeAllocSize(getType())) { - case 4: - Kind = SectionKind::getMergeableConst4(); - break; - case 8: - Kind = SectionKind::getMergeableConst8(); - break; - case 16: - Kind = SectionKind::getMergeableConst16(); - break; - default: - Kind = SectionKind::getReadOnly(); - break; - } + return SectionKind::getReadOnly(); } - return Kind; } MachineConstantPool::~MachineConstantPool() { diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp index aaf06a70da74..05463fc6a1ef 100644 --- a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp +++ b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp @@ -13,11 +13,14 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/DominanceFrontier.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/IVUsers.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/StackProtector.h" @@ -49,13 +52,16 @@ void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const { // passes explicitly. This does not include setPreservesCFG, // because CodeGen overloads that to mean preserving the MachineBasicBlock // CFG in addition to the LLVM IR CFG. - AU.addPreserved<AliasAnalysis>(); + AU.addPreserved<BasicAAWrapperPass>(); AU.addPreserved<DominanceFrontier>(); AU.addPreserved<DominatorTreeWrapperPass>(); + AU.addPreserved<AAResultsWrapperPass>(); + AU.addPreserved<GlobalsAAWrapperPass>(); AU.addPreserved<IVUsers>(); AU.addPreserved<LoopInfoWrapperPass>(); AU.addPreserved<MemoryDependenceAnalysis>(); - AU.addPreserved<ScalarEvolution>(); + AU.addPreserved<ScalarEvolutionWrapperPass>(); + AU.addPreserved<SCEVAAWrapperPass>(); AU.addPreserved<StackProtector>(); FunctionPass::getAnalysisUsage(AU); diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp index fdc4226ad926..1eb2edcd7cec 100644 --- a/contrib/llvm/lib/CodeGen/MachineInstr.cpp +++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp @@ -33,6 +33,7 @@ #include "llvm/IR/Value.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -43,6 +44,11 @@ #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; +static cl::opt<bool> PrintWholeRegMask( + "print-whole-regmask", + cl::desc("Print the full contents of regmask operands in IR dumps"), + cl::init(true), cl::Hidden); + //===----------------------------------------------------------------------===// // MachineOperand Implementation //===----------------------------------------------------------------------===// @@ -407,9 +413,26 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, if (getOffset()) OS << "+" << getOffset(); OS << '>'; break; - case MachineOperand::MO_RegisterMask: - OS << "<regmask>"; + case MachineOperand::MO_RegisterMask: { + unsigned NumRegsInMask = 0; + unsigned NumRegsEmitted = 0; + OS << "<regmask"; + for (unsigned i = 0; i < TRI->getNumRegs(); ++i) { + unsigned MaskWord = i / 32; + unsigned MaskBit = i % 32; + if (getRegMask()[MaskWord] & (1 << MaskBit)) { + if (PrintWholeRegMask || NumRegsEmitted <= 10) { + OS << " " << PrintReg(i, TRI); + NumRegsEmitted++; + } + NumRegsInMask++; + } + } + if (NumRegsEmitted != NumRegsInMask) + OS << " and " << (NumRegsInMask - NumRegsEmitted) << " more..."; + OS << ">"; break; + } case MachineOperand::MO_RegisterLiveOut: OS << "<regliveout>"; break; @@ -443,26 +466,28 @@ unsigned MachinePointerInfo::getAddrSpace() const { /// getConstantPool - Return a MachinePointerInfo record that refers to the /// constant pool. -MachinePointerInfo MachinePointerInfo::getConstantPool() { - return MachinePointerInfo(PseudoSourceValue::getConstantPool()); +MachinePointerInfo MachinePointerInfo::getConstantPool(MachineFunction &MF) { + return MachinePointerInfo(MF.getPSVManager().getConstantPool()); } /// getFixedStack - Return a MachinePointerInfo record that refers to the /// the specified FrameIndex. -MachinePointerInfo MachinePointerInfo::getFixedStack(int FI, int64_t offset) { - return MachinePointerInfo(PseudoSourceValue::getFixedStack(FI), offset); +MachinePointerInfo MachinePointerInfo::getFixedStack(MachineFunction &MF, + int FI, int64_t Offset) { + return MachinePointerInfo(MF.getPSVManager().getFixedStack(FI), Offset); } -MachinePointerInfo MachinePointerInfo::getJumpTable() { - return MachinePointerInfo(PseudoSourceValue::getJumpTable()); +MachinePointerInfo MachinePointerInfo::getJumpTable(MachineFunction &MF) { + return MachinePointerInfo(MF.getPSVManager().getJumpTable()); } -MachinePointerInfo MachinePointerInfo::getGOT() { - return MachinePointerInfo(PseudoSourceValue::getGOT()); +MachinePointerInfo MachinePointerInfo::getGOT(MachineFunction &MF) { + return MachinePointerInfo(MF.getPSVManager().getGOT()); } -MachinePointerInfo MachinePointerInfo::getStack(int64_t Offset) { - return MachinePointerInfo(PseudoSourceValue::getStack(), Offset); +MachinePointerInfo MachinePointerInfo::getStack(MachineFunction &MF, + int64_t Offset) { + return MachinePointerInfo(MF.getPSVManager().getStack(), Offset); } MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, unsigned f, @@ -606,10 +631,12 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST) const { void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) { if (MCID->ImplicitDefs) - for (const uint16_t *ImpDefs = MCID->getImplicitDefs(); *ImpDefs; ++ImpDefs) + for (const MCPhysReg *ImpDefs = MCID->getImplicitDefs(); *ImpDefs; + ++ImpDefs) addOperand(MF, MachineOperand::CreateReg(*ImpDefs, true, true)); if (MCID->ImplicitUses) - for (const uint16_t *ImpUses = MCID->getImplicitUses(); *ImpUses; ++ImpUses) + for (const MCPhysReg *ImpUses = MCID->getImplicitUses(); *ImpUses; + ++ImpUses) addOperand(MF, MachineOperand::CreateReg(*ImpUses, false, true)); } @@ -841,7 +868,7 @@ void MachineInstr::addMemOperand(MachineFunction &MF, bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const { assert(!isBundledWithPred() && "Must be called on bundle header"); - for (MachineBasicBlock::const_instr_iterator MII = this;; ++MII) { + for (MachineBasicBlock::const_instr_iterator MII = getIterator();; ++MII) { if (MII->getDesc().getFlags() & Mask) { if (Type == AnyInBundle) return true; @@ -865,13 +892,13 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other, if (isBundle()) { // Both instructions are bundles, compare MIs inside the bundle. - MachineBasicBlock::const_instr_iterator I1 = *this; + MachineBasicBlock::const_instr_iterator I1 = getIterator(); MachineBasicBlock::const_instr_iterator E1 = getParent()->instr_end(); - MachineBasicBlock::const_instr_iterator I2 = *Other; + MachineBasicBlock::const_instr_iterator I2 = Other->getIterator(); MachineBasicBlock::const_instr_iterator E2= Other->getParent()->instr_end(); while (++I1 != E1 && I1->isInsideBundle()) { ++I2; - if (I2 == E2 || !I2->isInsideBundle() || !I1->isIdenticalTo(I2, Check)) + if (I2 == E2 || !I2->isInsideBundle() || !I1->isIdenticalTo(&*I2, Check)) return false; } } @@ -976,7 +1003,7 @@ unsigned MachineInstr::getNumExplicitOperands() const { void MachineInstr::bundleWithPred() { assert(!isBundledWithPred() && "MI is already bundled with its predecessor"); setFlag(BundledPred); - MachineBasicBlock::instr_iterator Pred = this; + MachineBasicBlock::instr_iterator Pred = getIterator(); --Pred; assert(!Pred->isBundledWithSucc() && "Inconsistent bundle flags"); Pred->setFlag(BundledSucc); @@ -985,7 +1012,7 @@ void MachineInstr::bundleWithPred() { void MachineInstr::bundleWithSucc() { assert(!isBundledWithSucc() && "MI is already bundled with its successor"); setFlag(BundledSucc); - MachineBasicBlock::instr_iterator Succ = this; + MachineBasicBlock::instr_iterator Succ = getIterator(); ++Succ; assert(!Succ->isBundledWithPred() && "Inconsistent bundle flags"); Succ->setFlag(BundledPred); @@ -994,7 +1021,7 @@ void MachineInstr::bundleWithSucc() { void MachineInstr::unbundleFromPred() { assert(isBundledWithPred() && "MI isn't bundled with its predecessor"); clearFlag(BundledPred); - MachineBasicBlock::instr_iterator Pred = this; + MachineBasicBlock::instr_iterator Pred = getIterator(); --Pred; assert(Pred->isBundledWithSucc() && "Inconsistent bundle flags"); Pred->clearFlag(BundledSucc); @@ -1003,7 +1030,7 @@ void MachineInstr::unbundleFromPred() { void MachineInstr::unbundleFromSucc() { assert(isBundledWithSucc() && "MI isn't bundled with its successor"); clearFlag(BundledSucc); - MachineBasicBlock::instr_iterator Succ = this; + MachineBasicBlock::instr_iterator Succ = getIterator(); ++Succ; assert(Succ->isBundledWithPred() && "Inconsistent bundle flags"); Succ->clearFlag(BundledPred); @@ -1139,7 +1166,7 @@ const TargetRegisterClass *MachineInstr::getRegClassConstraintEffect( /// Return the number of instructions inside the MI bundle, not counting the /// header instruction. unsigned MachineInstr::getBundleSize() const { - MachineBasicBlock::const_instr_iterator I = this; + MachineBasicBlock::const_instr_iterator I = getIterator(); unsigned Size = 0; while (I->isBundledWithSucc()) ++Size, ++I; @@ -1501,6 +1528,10 @@ bool MachineInstr::hasUnmodeledSideEffects() const { return false; } +bool MachineInstr::isLoadFoldBarrier() const { + return mayStore() || isCall() || hasUnmodeledSideEffects(); +} + /// allDefsAreDead - Return true if all the defs of this instruction are dead. /// bool MachineInstr::allDefsAreDead() const { @@ -1615,7 +1646,6 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, FirstOp = false; } - for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) { const MachineOperand &MO = getOperand(i); @@ -1706,13 +1736,16 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, } bool HaveSemi = false; - const unsigned PrintableFlags = FrameSetup; + const unsigned PrintableFlags = FrameSetup | FrameDestroy; if (Flags & PrintableFlags) { if (!HaveSemi) OS << ";"; HaveSemi = true; OS << " flags: "; if (Flags & FrameSetup) OS << "FrameSetup"; + + if (Flags & FrameDestroy) + OS << "FrameDestroy"; } if (!memoperands_empty()) { @@ -1755,7 +1788,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, DebugLoc InlinedAtDL(InlinedAt); if (InlinedAtDL && MF) { OS << " inlined @[ "; - InlinedAtDL.print(OS); + InlinedAtDL.print(OS); OS << " ]"; } } @@ -1902,11 +1935,11 @@ void MachineInstr::clearRegisterDeads(unsigned Reg) { } } -void MachineInstr::addRegisterDefReadUndef(unsigned Reg) { +void MachineInstr::setRegisterDefReadUndef(unsigned Reg, bool IsUndef) { for (MachineOperand &MO : operands()) { if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg || MO.getSubReg() == 0) continue; - MO.setIsUndef(); + MO.setIsUndef(IsUndef); } } diff --git a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp index cd820ee1ac52..3eaf4c5dea0f 100644 --- a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp +++ b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp @@ -293,15 +293,17 @@ MachineOperandIteratorBase::PhysRegInfo MachineOperandIteratorBase::analyzePhysReg(unsigned Reg, const TargetRegisterInfo *TRI) { bool AllDefsDead = true; - PhysRegInfo PRI = {false, false, false, false, false, false}; + PhysRegInfo PRI = {false, false, false, false, false, false, false}; assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "analyzePhysReg not given a physical register!"); for (; isValid(); ++*this) { MachineOperand &MO = deref(); - if (MO.isRegMask() && MO.clobbersPhysReg(Reg)) - PRI.Clobbers = true; // Regmask clobbers Reg. + if (MO.isRegMask() && MO.clobbersPhysReg(Reg)) { + PRI.Clobbered = true; + continue; + } if (!MO.isReg()) continue; @@ -310,33 +312,28 @@ MachineOperandIteratorBase::analyzePhysReg(unsigned Reg, if (!MOReg || !TargetRegisterInfo::isPhysicalRegister(MOReg)) continue; - bool IsRegOrSuperReg = MOReg == Reg || TRI->isSubRegister(MOReg, Reg); - bool IsRegOrOverlapping = MOReg == Reg || TRI->regsOverlap(MOReg, Reg); - - if (IsRegOrSuperReg && MO.readsReg()) { - // Reg or a super-reg is read, and perhaps killed also. - PRI.Reads = true; - PRI.Kills = MO.isKill(); - } - - if (IsRegOrOverlapping && MO.readsReg()) { - PRI.ReadsOverlap = true;// Reg or an overlapping register is read. - } - - if (!MO.isDef()) + if (!TRI->regsOverlap(MOReg, Reg)) continue; - if (IsRegOrSuperReg) { - PRI.Defines = true; // Reg or a super-register is defined. + bool Covered = TRI->isSuperRegisterEq(MOReg, Reg); + if (MO.readsReg()) { + PRI.Read = true; + if (Covered) { + PRI.FullyRead = true; + if (MO.isKill()) + PRI.Killed = true; + } + } else if (MO.isDef()) { + PRI.Defined = true; + if (Covered) + PRI.FullyDefined = true; if (!MO.isDead()) AllDefsDead = false; } - if (IsRegOrOverlapping) - PRI.Clobbers = true; // Reg or an overlapping reg is defined. } - if (AllDefsDead && PRI.Defines) - PRI.DefinesDead = true; // Reg or super-register was defined and was dead. + if (AllDefsDead && PRI.FullyDefined) + PRI.DeadDef = true; return PRI; } diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp index e9ea5ed9648c..a8368e9c80d6 100644 --- a/contrib/llvm/lib/CodeGen/MachineLICM.cpp +++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp @@ -138,7 +138,7 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<MachineLoopInfo>(); AU.addRequired<MachineDominatorTree>(); - AU.addRequired<AliasAnalysis>(); + AU.addRequired<AAResultsWrapperPass>(); AU.addPreserved<MachineLoopInfo>(); AU.addPreserved<MachineDominatorTree>(); MachineFunctionPass::getAnalysisUsage(AU); @@ -153,7 +153,7 @@ namespace { } private: - /// CandidateInfo - Keep track of information about hoisting candidates. + /// Keep track of information about hoisting candidates. struct CandidateInfo { MachineInstr *MI; unsigned Def; @@ -162,149 +162,76 @@ namespace { : MI(mi), Def(def), FI(fi) {} }; - /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop - /// invariants out to the preheader. void HoistRegionPostRA(); - /// HoistPostRA - When an instruction is found to only use loop invariant - /// operands that is safe to hoist, this instruction is called to do the - /// dirty work. void HoistPostRA(MachineInstr *MI, unsigned Def); - /// ProcessMI - Examine the instruction for potentai LICM candidate. Also - /// gather register def and frame object update information. - void ProcessMI(MachineInstr *MI, - BitVector &PhysRegDefs, - BitVector &PhysRegClobbers, - SmallSet<int, 32> &StoredFIs, + void ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs, + BitVector &PhysRegClobbers, SmallSet<int, 32> &StoredFIs, SmallVectorImpl<CandidateInfo> &Candidates); - /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the - /// current loop. void AddToLiveIns(unsigned Reg); - /// IsLICMCandidate - Returns true if the instruction may be a suitable - /// candidate for LICM. e.g. If the instruction is a call, then it's - /// obviously not safe to hoist it. bool IsLICMCandidate(MachineInstr &I); - /// IsLoopInvariantInst - Returns true if the instruction is loop - /// invariant. I.e., all virtual register operands are defined outside of - /// the loop, physical registers aren't accessed (explicitly or implicitly), - /// and the instruction is hoistable. - /// bool IsLoopInvariantInst(MachineInstr &I); - /// HasLoopPHIUse - Return true if the specified instruction is used by any - /// phi node in the current loop. bool HasLoopPHIUse(const MachineInstr *MI) const; - /// HasHighOperandLatency - Compute operand latency between a def of 'Reg' - /// and an use in the current loop, return true if the target considered - /// it 'high'. bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, unsigned Reg) const; bool IsCheapInstruction(MachineInstr &MI) const; - /// CanCauseHighRegPressure - Visit BBs from header to current BB, - /// check if hoisting an instruction of the given cost matrix can cause high - /// register pressure. bool CanCauseHighRegPressure(const DenseMap<unsigned, int> &Cost, bool Cheap); - /// UpdateBackTraceRegPressure - Traverse the back trace from header to - /// the current block and update their register pressures to reflect the - /// effect of hoisting MI from the current block to the preheader. void UpdateBackTraceRegPressure(const MachineInstr *MI); - /// IsProfitableToHoist - Return true if it is potentially profitable to - /// hoist the given loop invariant. bool IsProfitableToHoist(MachineInstr &MI); - /// IsGuaranteedToExecute - Check if this mbb is guaranteed to execute. - /// If not then a load from this mbb may not be safe to hoist. bool IsGuaranteedToExecute(MachineBasicBlock *BB); void EnterScope(MachineBasicBlock *MBB); void ExitScope(MachineBasicBlock *MBB); - /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to given - /// dominator tree node if its a leaf or all of its children are done. Walk - /// up the dominator tree to destroy ancestors which are now done. - void ExitScopeIfDone(MachineDomTreeNode *Node, - DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, - DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap); - - /// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all - /// blocks dominated by the specified header block, and that are in the - /// current loop) in depth first order w.r.t the DominatorTree. This allows - /// us to visit definitions before uses, allowing us to hoist a loop body in - /// one pass without iteration. - /// + void ExitScopeIfDone( + MachineDomTreeNode *Node, + DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren, + DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap); + void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode); + void HoistRegion(MachineDomTreeNode *N, bool IsHeader); - /// SinkIntoLoop - Sink instructions into loops if profitable. This - /// especially tries to prevent register spills caused by register pressure - /// if there is little to no overhead moving instructions into loops. void SinkIntoLoop(); - /// InitRegPressure - Find all virtual register references that are liveout - /// of the preheader to initialize the starting "register pressure". Note - /// this does not count live through (livein but not used) registers. void InitRegPressure(MachineBasicBlock *BB); - /// calcRegisterCost - Calculate the additional register pressure that the - /// registers used in MI cause. - /// - /// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to - /// figure out which usages are live-ins. - /// FIXME: Figure out a way to consider 'RegSeen' from all code paths. DenseMap<unsigned, int> calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen, bool ConsiderUnseenAsDef); - /// UpdateRegPressure - Update estimate of register pressure after the - /// specified instruction. void UpdateRegPressure(const MachineInstr *MI, bool ConsiderUnseenAsDef = false); - /// ExtractHoistableLoad - Unfold a load from the given machineinstr if - /// the load itself could be hoisted. Return the unfolded and hoistable - /// load, or null if the load couldn't be unfolded or if it wouldn't - /// be hoistable. MachineInstr *ExtractHoistableLoad(MachineInstr *MI); - /// LookForDuplicate - Find an instruction amount PrevMIs that is a - /// duplicate of MI. Return this instruction if it's found. - const MachineInstr *LookForDuplicate(const MachineInstr *MI, - std::vector<const MachineInstr*> &PrevMIs); + const MachineInstr * + LookForDuplicate(const MachineInstr *MI, + std::vector<const MachineInstr *> &PrevMIs); - /// EliminateCSE - Given a LICM'ed instruction, look for an instruction on - /// the preheader that compute the same value. If it's found, do a RAU on - /// with the definition of the existing instruction rather than hoisting - /// the instruction to the preheader. - bool EliminateCSE(MachineInstr *MI, - DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI); + bool EliminateCSE( + MachineInstr *MI, + DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator &CI); - /// MayCSE - Return true if the given instruction will be CSE'd if it's - /// hoisted out of the loop. bool MayCSE(MachineInstr *MI); - /// Hoist - When an instruction is found to only use loop invariant operands - /// that is safe to hoist, this instruction is called to do the dirty work. - /// It returns true if the instruction is hoisted. bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader); - /// InitCSEMap - Initialize the CSE map with instructions that are in the - /// current loop preheader that may become duplicates of instructions that - /// are hoisted out of the loop. void InitCSEMap(MachineBasicBlock *BB); - /// getCurPreheader - Get the preheader for the current loop, splitting - /// a critical edge if needed. MachineBasicBlock *getCurPreheader(); }; } // end anonymous namespace @@ -315,12 +242,11 @@ INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm", "Machine Loop Invariant Code Motion", false, false) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(MachineLICM, "machinelicm", "Machine Loop Invariant Code Motion", false, false) -/// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most -/// loop that has a unique predecessor. +/// Test if the given loop is the outer-most loop that has a unique predecessor. static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { // Check whether this loop even has a unique predecessor. if (!CurLoop->getLoopPredecessor()) @@ -367,7 +293,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { // Get our Loop information... MLI = &getAnalysis<MachineLoopInfo>(); DT = &getAnalysis<MachineDominatorTree>(); - AA = &getAnalysis<AliasAnalysis>(); + AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end()); while (!Worklist.empty()) { @@ -402,9 +328,12 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { return Changed; } -/// InstructionStoresToFI - Return true if instruction stores to the -/// specified frame. +/// Return true if instruction stores to the specified frame. static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { + // If we lost memory operands, conservatively assume that the instruction + // writes to all slots. + if (MI->memoperands_empty()) + return true; for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), oe = MI->memoperands_end(); o != oe; ++o) { if (!(*o)->isStore() || !(*o)->getPseudoValue()) @@ -418,7 +347,7 @@ static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { return false; } -/// ProcessMI - Examine the instruction for potentai LICM candidate. Also +/// Examine the instruction for potentai LICM candidate. Also /// gather register def and frame object update information. void MachineLICM::ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs, @@ -506,8 +435,8 @@ void MachineLICM::ProcessMI(MachineInstr *MI, } } -/// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop -/// invariants out to the preheader. +/// Walk the specified region of the CFG and hoist loop invariants out to the +/// preheader. void MachineLICM::HoistRegionPostRA() { MachineBasicBlock *Preheader = getCurPreheader(); if (!Preheader) @@ -529,15 +458,13 @@ void MachineLICM::HoistRegionPostRA() { // If the header of the loop containing this basic block is a landing pad, // then don't try to hoist instructions out of this loop. const MachineLoop *ML = MLI->getLoopFor(BB); - if (ML && ML->getHeader()->isLandingPad()) continue; + if (ML && ML->getHeader()->isEHPad()) continue; // Conservatively treat live-in's as an external def. // FIXME: That means a reload that're reused in successor block(s) will not // be LICM'ed. - for (MachineBasicBlock::livein_iterator I = BB->livein_begin(), - E = BB->livein_end(); I != E; ++I) { - unsigned Reg = *I; - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + for (const auto &LI : BB->liveins()) { + for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI) PhysRegDefs.set(*AI); } @@ -601,8 +528,8 @@ void MachineLICM::HoistRegionPostRA() { } } -/// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current -/// loop, and make sure it is not killed by any instructions in the loop. +/// Add register 'Reg' to the livein sets of BBs in the current loop, and make +/// sure it is not killed by any instructions in the loop. void MachineLICM::AddToLiveIns(unsigned Reg) { const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks(); for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { @@ -622,9 +549,8 @@ void MachineLICM::AddToLiveIns(unsigned Reg) { } } -/// HoistPostRA - When an instruction is found to only use loop invariant -/// operands that is safe to hoist, this instruction is called to do the -/// dirty work. +/// When an instruction is found to only use loop invariant operands that is +/// safe to hoist, this instruction is called to do the dirty work. void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { MachineBasicBlock *Preheader = getCurPreheader(); @@ -646,8 +572,8 @@ void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { Changed = true; } -// IsGuaranteedToExecute - Check if this mbb is guaranteed to execute. -// If not then a load from this mbb may not be safe to hoist. +/// Check if this mbb is guaranteed to execute. If not then a load from this mbb +/// may not be safe to hoist. bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) { if (SpeculationState != SpeculateUnknown) return SpeculationState == SpeculateFalse; @@ -679,9 +605,9 @@ void MachineLICM::ExitScope(MachineBasicBlock *MBB) { BackTrace.pop_back(); } -/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given -/// dominator tree node if its a leaf or all of its children are done. Walk -/// up the dominator tree to destroy ancestors which are now done. +/// Destroy scope for the MBB that corresponds to the given dominator tree node +/// if its a leaf or all of its children are done. Walk up the dominator tree to +/// destroy ancestors which are now done. void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node, DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) { @@ -701,11 +627,10 @@ void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node, } } -/// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all -/// blocks dominated by the specified header block, and that are in the -/// current loop) in depth first order w.r.t the DominatorTree. This allows -/// us to visit definitions before uses, allowing us to hoist a loop body in -/// one pass without iteration. +/// Walk the specified loop in the CFG (defined by all blocks dominated by the +/// specified header block, and that are in the current loop) in depth first +/// order w.r.t the DominatorTree. This allows us to visit definitions before +/// uses, allowing us to hoist a loop body in one pass without iteration. /// void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { MachineBasicBlock *Preheader = getCurPreheader(); @@ -727,7 +652,7 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { // If the header of the loop containing this basic block is a landing pad, // then don't try to hoist instructions out of this loop. const MachineLoop *ML = MLI->getLoopFor(BB); - if (ML && ML->getHeader()->isLandingPad()) + if (ML && ML->getHeader()->isEHPad()) continue; // If this subregion is not in the top level loop at all, exit. @@ -786,6 +711,9 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { } } +/// Sink instructions into loops if profitable. This especially tries to prevent +/// register spills caused by register pressure if there is little to no +/// overhead moving instructions into loops. void MachineLICM::SinkIntoLoop() { MachineBasicBlock *Preheader = getCurPreheader(); if (!Preheader) @@ -796,8 +724,8 @@ void MachineLICM::SinkIntoLoop() { I != Preheader->instr_end(); ++I) { // We need to ensure that we can safely move this instruction into the loop. // As such, it must not have side-effects, e.g. such as a call has. - if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(I)) - Candidates.push_back(I); + if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(&*I)) + Candidates.push_back(&*I); } for (MachineInstr *I : Candidates) { @@ -837,9 +765,9 @@ static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) { return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg()); } -/// InitRegPressure - Find all virtual register references that are liveout of -/// the preheader to initialize the starting "register pressure". Note this -/// does not count live through (livein but not used) registers. +/// Find all virtual register references that are liveout of the preheader to +/// initialize the starting "register pressure". Note this does not count live +/// through (livein but not used) registers. void MachineLICM::InitRegPressure(MachineBasicBlock *BB) { std::fill(RegPressure.begin(), RegPressure.end(), 0); @@ -858,8 +786,7 @@ void MachineLICM::InitRegPressure(MachineBasicBlock *BB) { UpdateRegPressure(&MI, /*ConsiderUnseenAsDef=*/true); } -/// UpdateRegPressure - Update estimate of register pressure after the -/// specified instruction. +/// Update estimate of register pressure after the specified instruction. void MachineLICM::UpdateRegPressure(const MachineInstr *MI, bool ConsiderUnseenAsDef) { auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/true, ConsiderUnseenAsDef); @@ -872,6 +799,12 @@ void MachineLICM::UpdateRegPressure(const MachineInstr *MI, } } +/// Calculate the additional register pressure that the registers used in MI +/// cause. +/// +/// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to +/// figure out which usages are live-ins. +/// FIXME: Figure out a way to consider 'RegSeen' from all code paths. DenseMap<unsigned, int> MachineLICM::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen, bool ConsiderUnseenAsDef) { @@ -915,23 +848,28 @@ MachineLICM::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen, return Cost; } -/// isLoadFromGOTOrConstantPool - Return true if this machine instruction -/// loads from global offset table or constant pool. -static bool isLoadFromGOTOrConstantPool(MachineInstr &MI) { +/// Return true if this machine instruction loads from global offset table or +/// constant pool. +static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) { assert (MI.mayLoad() && "Expected MI that loads!"); + + // If we lost memory operands, conservatively assume that the instruction + // reads from everything.. + if (MI.memoperands_empty()) + return true; + for (MachineInstr::mmo_iterator I = MI.memoperands_begin(), E = MI.memoperands_end(); I != E; ++I) { if (const PseudoSourceValue *PSV = (*I)->getPseudoValue()) { - if (PSV == PSV->getGOT() || PSV == PSV->getConstantPool()) + if (PSV->isGOT() || PSV->isConstantPool()) return true; } } return false; } -/// IsLICMCandidate - Returns true if the instruction may be a suitable -/// candidate for LICM. e.g. If the instruction is a call, then it's obviously -/// not safe to hoist it. +/// Returns true if the instruction may be a suitable candidate for LICM. +/// e.g. If the instruction is a call, then it's obviously not safe to hoist it. bool MachineLICM::IsLICMCandidate(MachineInstr &I) { // Check if it's safe to move the instruction. bool DontMoveAcrossStore = true; @@ -944,16 +882,16 @@ bool MachineLICM::IsLICMCandidate(MachineInstr &I) { // from constant memory are not safe to speculate all the time, for example // indexed load from a jump table. // Stores and side effects are already checked by isSafeToMove. - if (I.mayLoad() && !isLoadFromGOTOrConstantPool(I) && + if (I.mayLoad() && !mayLoadFromGOTOrConstantPool(I) && !IsGuaranteedToExecute(I.getParent())) return false; return true; } -/// IsLoopInvariantInst - Returns true if the instruction is loop -/// invariant. I.e., all virtual register operands are defined outside of the -/// loop, physical registers aren't accessed explicitly, and there are no side +/// Returns true if the instruction is loop invariant. +/// I.e., all virtual register operands are defined outside of the loop, +/// physical registers aren't accessed explicitly, and there are no side /// effects that aren't captured by the operands or other flags. /// bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { @@ -1007,8 +945,8 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { } -/// HasLoopPHIUse - Return true if the specified instruction is used by a -/// phi node and hoisting it could cause a copy to be inserted. +/// Return true if the specified instruction is used by a phi node and hoisting +/// it could cause a copy to be inserted. bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const { SmallVector<const MachineInstr*, 8> Work(1, MI); do { @@ -1042,9 +980,8 @@ bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const { return false; } -/// HasHighOperandLatency - Compute operand latency between a def of 'Reg' -/// and an use in the current loop, return true if the target considered -/// it 'high'. +/// Compute operand latency between a def of 'Reg' and an use in the current +/// loop, return true if the target considered it high. bool MachineLICM::HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, unsigned Reg) const { if (MRI->use_nodbg_empty(Reg)) @@ -1074,8 +1011,8 @@ bool MachineLICM::HasHighOperandLatency(MachineInstr &MI, return false; } -/// IsCheapInstruction - Return true if the instruction is marked "cheap" or -/// the operand latency between its def and a use is one or less. +/// Return true if the instruction is marked "cheap" or the operand latency +/// between its def and a use is one or less. bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const { if (TII->isAsCheapAsAMove(&MI) || MI.isCopyLike()) return true; @@ -1099,9 +1036,8 @@ bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const { return isCheap; } -/// CanCauseHighRegPressure - Visit BBs from header to current BB, check -/// if hoisting an instruction of the given cost matrix can cause high -/// register pressure. +/// Visit BBs from header to current BB, check if hoisting an instruction of the +/// given cost matrix can cause high register pressure. bool MachineLICM::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost, bool CheapInstr) { for (const auto &RPIdAndCost : Cost) { @@ -1124,9 +1060,9 @@ bool MachineLICM::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost, return false; } -/// UpdateBackTraceRegPressure - Traverse the back trace from header to the -/// current block and update their register pressures to reflect the effect -/// of hoisting MI from the current block to the preheader. +/// Traverse the back trace from header to the current block and update their +/// register pressures to reflect the effect of hoisting MI from the current +/// block to the preheader. void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) { // First compute the 'cost' of the instruction, i.e. its contribution // to register pressure. @@ -1139,8 +1075,8 @@ void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) { RP[RPIdAndCost.first] += RPIdAndCost.second; } -/// IsProfitableToHoist - Return true if it is potentially profitable to hoist -/// the given loop invariant. +/// Return true if it is potentially profitable to hoist the given loop +/// invariant. bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { if (MI.isImplicitDef()) return true; @@ -1230,6 +1166,9 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { return true; } +/// Unfold a load from the given machineinstr if the load itself could be +/// hoisted. Return the unfolded and hoistable load, or null if the load +/// couldn't be unfolded or if it wouldn't be hoistable. MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { // Don't unfold simple loads. if (MI->canFoldAsLoad()) @@ -1287,6 +1226,9 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { return NewMIs[0]; } +/// Initialize the CSE map with instructions that are in the current loop +/// preheader that may become duplicates of instructions that are hoisted +/// out of the loop. void MachineLICM::InitCSEMap(MachineBasicBlock *BB) { for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) { const MachineInstr *MI = &*I; @@ -1295,6 +1237,8 @@ void MachineLICM::InitCSEMap(MachineBasicBlock *BB) { } } +/// Find an instruction amount PrevMIs that is a duplicate of MI. +/// Return this instruction if it's found. const MachineInstr* MachineLICM::LookForDuplicate(const MachineInstr *MI, std::vector<const MachineInstr*> &PrevMIs) { @@ -1306,6 +1250,10 @@ MachineLICM::LookForDuplicate(const MachineInstr *MI, return nullptr; } +/// Given a LICM'ed instruction, look for an instruction on the preheader that +/// computes the same value. If it's found, do a RAU on with the definition of +/// the existing instruction rather than hoisting the instruction to the +/// preheader. bool MachineLICM::EliminateCSE(MachineInstr *MI, DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) { // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate @@ -1363,8 +1311,8 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI, return false; } -/// MayCSE - Return true if the given instruction will be CSE'd if it's -/// hoisted out of the loop. +/// Return true if the given instruction will be CSE'd if it's hoisted out of +/// the loop. bool MachineLICM::MayCSE(MachineInstr *MI) { unsigned Opcode = MI->getOpcode(); DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator @@ -1377,9 +1325,9 @@ bool MachineLICM::MayCSE(MachineInstr *MI) { return LookForDuplicate(MI, CI->second) != nullptr; } -/// Hoist - When an instruction is found to use only loop invariant operands +/// When an instruction is found to use only loop invariant operands /// that are safe to hoist, this instruction is called to do the dirty work. -/// +/// It returns true if the instruction is hoisted. bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { // First check whether we should hoist this instruction. if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) { @@ -1441,6 +1389,7 @@ bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { return true; } +/// Get the preheader for the current loop, splitting a critical edge if needed. MachineBasicBlock *MachineLICM::getCurPreheader() { // Determine the block to which to hoist instructions. If we can't find a // suitable loop predecessor, we can't do any hoisting. diff --git a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp index ce6abdd870b3..2f5c9e05cc7b 100644 --- a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp @@ -37,7 +37,7 @@ char &llvm::MachineLoopInfoID = MachineLoopInfo::ID; bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) { releaseMemory(); - LI.Analyze(getAnalysis<MachineDominatorTree>().getBase()); + LI.analyze(getAnalysis<MachineDominatorTree>().getBase()); return false; } @@ -51,11 +51,11 @@ MachineBasicBlock *MachineLoop::getTopBlock() { MachineBasicBlock *TopMBB = getHeader(); MachineFunction::iterator Begin = TopMBB->getParent()->begin(); if (TopMBB != Begin) { - MachineBasicBlock *PriorMBB = std::prev(MachineFunction::iterator(TopMBB)); + MachineBasicBlock *PriorMBB = &*std::prev(TopMBB->getIterator()); while (contains(PriorMBB)) { TopMBB = PriorMBB; if (TopMBB == Begin) break; - PriorMBB = std::prev(MachineFunction::iterator(TopMBB)); + PriorMBB = &*std::prev(TopMBB->getIterator()); } } return TopMBB; @@ -65,11 +65,12 @@ MachineBasicBlock *MachineLoop::getBottomBlock() { MachineBasicBlock *BotMBB = getHeader(); MachineFunction::iterator End = BotMBB->getParent()->end(); if (BotMBB != std::prev(End)) { - MachineBasicBlock *NextMBB = std::next(MachineFunction::iterator(BotMBB)); + MachineBasicBlock *NextMBB = &*std::next(BotMBB->getIterator()); while (contains(NextMBB)) { BotMBB = NextMBB; - if (BotMBB == std::next(MachineFunction::iterator(BotMBB))) break; - NextMBB = std::next(MachineFunction::iterator(BotMBB)); + if (BotMBB == &*std::next(BotMBB->getIterator())) + break; + NextMBB = &*std::next(BotMBB->getIterator()); } } return BotMBB; diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp index 6a206249d834..1956a701d8e6 100644 --- a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp @@ -9,12 +9,12 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/ADT/PointerUnion.h" -#include "llvm/Analysis/LibCallSemantics.h" +#include "llvm/ADT/TinyPtrVector.h" +#include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalVariable.h" @@ -35,7 +35,7 @@ char MachineModuleInfo::ID = 0; MachineModuleInfoImpl::~MachineModuleInfoImpl() {} namespace llvm { -class MMIAddrLabelMapCallbackPtr : CallbackVH { +class MMIAddrLabelMapCallbackPtr final : CallbackVH { MMIAddrLabelMap *Map; public: MMIAddrLabelMapCallbackPtr() : Map(nullptr) {} @@ -209,9 +209,8 @@ bool MachineModuleInfo::doInitialization(Module &M) { CurCallSite = 0; CallsEHReturn = false; CallsUnwindInit = false; + HasEHFunclets = false; DbgInfoAvailable = UsesVAFloatArgument = UsesMorestackAddr = false; - // Always emit some info, by default "no personality" info. - Personalities.push_back(nullptr); PersonalityTypeCache = EHPersonality::Unknown; AddrLabelSymbols = nullptr; TheModule = nullptr; @@ -249,6 +248,7 @@ void MachineModuleInfo::EndFunction() { FilterEnds.clear(); CallsEHReturn = false; CallsUnwindInit = false; + HasEHFunclets = false; VariableDbgInfos.clear(); } @@ -314,32 +314,11 @@ MCSymbol *MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) { return LandingPadLabel; } -/// addPersonality - Provide the personality function for the exception -/// information. -void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad, - const Function *Personality) { - LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); - LP.Personality = Personality; - addPersonality(Personality); -} - void MachineModuleInfo::addPersonality(const Function *Personality) { for (unsigned i = 0; i < Personalities.size(); ++i) if (Personalities[i] == Personality) return; - - // If this is the first personality we're adding go - // ahead and add it at the beginning. - if (!Personalities[0]) - Personalities[0] = Personality; - else - Personalities.push_back(Personality); -} - -void MachineModuleInfo::addWinEHState(MachineBasicBlock *LandingPad, - int State) { - LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); - LP.WinEHState = State; + Personalities.push_back(Personality); } /// addCatchTypeInfo - Provide the catch typeinfo for a landing pad. @@ -481,56 +460,3 @@ try_next:; FilterIds.push_back(0); // terminator return FilterID; } - -/// getPersonality - Return the personality function for the current function. -const Function *MachineModuleInfo::getPersonality() const { - for (const LandingPadInfo &LPI : LandingPads) - if (LPI.Personality) - return LPI.Personality; - return nullptr; -} - -EHPersonality MachineModuleInfo::getPersonalityType() { - if (PersonalityTypeCache == EHPersonality::Unknown) { - if (const Function *F = getPersonality()) - PersonalityTypeCache = classifyEHPersonality(F); - } - return PersonalityTypeCache; -} - -/// getPersonalityIndex - Return unique index for current personality -/// function. NULL/first personality function should always get zero index. -unsigned MachineModuleInfo::getPersonalityIndex() const { - const Function* Personality = nullptr; - - // Scan landing pads. If there is at least one non-NULL personality - use it. - for (unsigned i = 0, e = LandingPads.size(); i != e; ++i) - if (LandingPads[i].Personality) { - Personality = LandingPads[i].Personality; - break; - } - - for (unsigned i = 0, e = Personalities.size(); i < e; ++i) { - if (Personalities[i] == Personality) - return i; - } - - // This will happen if the current personality function is - // in the zero index. - return 0; -} - -const Function *MachineModuleInfo::getWinEHParent(const Function *F) const { - StringRef WinEHParentName = - F->getFnAttribute("wineh-parent").getValueAsString(); - if (WinEHParentName.empty() || WinEHParentName == F->getName()) - return F; - return F->getParent()->getFunction(WinEHParentName); -} - -WinEHFuncInfo &MachineModuleInfo::getWinEHFuncInfo(const Function *F) { - auto &Ptr = FuncInfoMap[getWinEHParent(F)]; - if (!Ptr) - Ptr.reset(new WinEHFuncInfo); - return *Ptr; -} diff --git a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp index e883ce523134..03c82f46da63 100644 --- a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp @@ -27,13 +27,11 @@ void MachineRegisterInfo::Delegate::anchor() {} MachineRegisterInfo::MachineRegisterInfo(const MachineFunction *MF) : MF(MF), TheDelegate(nullptr), IsSSA(true), TracksLiveness(true), TracksSubRegLiveness(false) { + unsigned NumRegs = getTargetRegisterInfo()->getNumRegs(); VRegInfo.reserve(256); RegAllocHints.reserve(256); - UsedRegUnits.resize(getTargetRegisterInfo()->getNumRegUnits()); - UsedPhysRegMask.resize(getTargetRegisterInfo()->getNumRegs()); - - // Create the physreg use/def lists. - PhysRegUseDefLists.resize(getTargetRegisterInfo()->getNumRegs(), nullptr); + UsedPhysRegMask.resize(NumRegs); + PhysRegUseDefLists.reset(new MachineOperand*[NumRegs]()); } /// setRegClass - Set the register class of the specified virtual register. @@ -117,6 +115,8 @@ void MachineRegisterInfo::clearVirtRegs() { } #endif VRegInfo.clear(); + for (auto &I : LiveIns) + I.second = 0; } void MachineRegisterInfo::verifyUseList(unsigned Reg) const { @@ -394,8 +394,7 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB, } } -unsigned MachineRegisterInfo::getMaxLaneMaskForVReg(unsigned Reg) const -{ +LaneBitmask MachineRegisterInfo::getMaxLaneMaskForVReg(unsigned Reg) const { // Lane masks are only defined for vregs. assert(TargetRegisterInfo::isVirtualRegister(Reg)); const TargetRegisterClass &TRC = *getRegClass(Reg); @@ -468,11 +467,8 @@ static bool isNoReturnDef(const MachineOperand &MO) { if (MF.getFunction()->hasFnAttribute(Attribute::UWTable)) return false; const Function *Called = getCalledFunction(MI); - if (Called == nullptr || !Called->hasFnAttribute(Attribute::NoReturn) - || !Called->hasFnAttribute(Attribute::NoUnwind)) - return false; - - return true; + return !(Called == nullptr || !Called->hasFnAttribute(Attribute::NoReturn) || + !Called->hasFnAttribute(Attribute::NoUnwind)); } bool MachineRegisterInfo::isPhysRegModified(unsigned PhysReg) const { @@ -488,3 +484,15 @@ bool MachineRegisterInfo::isPhysRegModified(unsigned PhysReg) const { } return false; } + +bool MachineRegisterInfo::isPhysRegUsed(unsigned PhysReg) const { + if (UsedPhysRegMask.test(PhysReg)) + return true; + const TargetRegisterInfo *TRI = getTargetRegisterInfo(); + for (MCRegAliasIterator AliasReg(PhysReg, TRI, true); AliasReg.isValid(); + ++AliasReg) { + if (!reg_nodbg_empty(*AliasReg)) + return true; + } + return false; +} diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp index a48e54caf3fe..bcee15c7c75f 100644 --- a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp @@ -49,6 +49,11 @@ DumpCriticalPathLength("misched-dcpl", cl::Hidden, static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden, cl::desc("Pop up a window to show MISched dags after they are processed")); +/// In some situations a few uninteresting nodes depend on nearly all other +/// nodes in the graph, provide a cutoff to hide them. +static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, + cl::desc("Hide nodes with more predecessor/successor than cutoff")); + static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U)); @@ -106,7 +111,7 @@ public: void print(raw_ostream &O, const Module* = nullptr) const override; protected: - void scheduleRegions(ScheduleDAGInstrs &Scheduler); + void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags); }; /// MachineScheduler runs after coalescing and before register allocation. @@ -146,7 +151,7 @@ char &llvm::MachineSchedulerID = MachineScheduler::ID; INITIALIZE_PASS_BEGIN(MachineScheduler, "machine-scheduler", "Machine Instruction Scheduler", false, false) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_DEPENDENCY(LiveIntervals) INITIALIZE_PASS_END(MachineScheduler, "machine-scheduler", @@ -161,7 +166,7 @@ void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequiredID(MachineDominatorsID); AU.addRequired<MachineLoopInfo>(); - AU.addRequired<AliasAnalysis>(); + AU.addRequired<AAResultsWrapperPass>(); AU.addRequired<TargetPassConfig>(); AU.addRequired<SlotIndexes>(); AU.addPreserved<SlotIndexes>(); @@ -315,14 +320,14 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { } else if (!mf.getSubtarget().enableMachineScheduler()) return false; - DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs())); + DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs())); // Initialize the context of the pass. MF = &mf; MLI = &getAnalysis<MachineLoopInfo>(); MDT = &getAnalysis<MachineDominatorTree>(); PassConfig = &getAnalysis<TargetPassConfig>(); - AA = &getAnalysis<AliasAnalysis>(); + AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); LIS = &getAnalysis<LiveIntervals>(); @@ -335,7 +340,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { // Instantiate the selected scheduler for this target, function, and // optimization level. std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler()); - scheduleRegions(*Scheduler); + scheduleRegions(*Scheduler, false); DEBUG(LIS->dump()); if (VerifyScheduling) @@ -363,7 +368,7 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) { // Instantiate the selected scheduler for this target, function, and // optimization level. std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler()); - scheduleRegions(*Scheduler); + scheduleRegions(*Scheduler, true); if (VerifyScheduling) MF->verify(this, "After post machine scheduling."); @@ -383,15 +388,14 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) { static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, - const TargetInstrInfo *TII, - bool IsPostRA) { + const TargetInstrInfo *TII) { return MI->isCall() || TII->isSchedulingBoundary(MI, MBB, *MF); } /// Main driver for both MachineScheduler and PostMachineScheduler. -void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) { +void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler, + bool FixKillFlags) { const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); - bool IsPostRA = Scheduler.isPostRA(); // Visit all machine basic blocks. // @@ -400,7 +404,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) { for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end(); MBB != MBBEnd; ++MBB) { - Scheduler.startBlock(MBB); + Scheduler.startBlock(&*MBB); #ifndef NDEBUG if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName()) @@ -429,7 +433,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) { // Avoid decrementing RegionEnd for blocks with no terminator. if (RegionEnd != MBB->end() || - isSchedBoundary(std::prev(RegionEnd), MBB, MF, TII, IsPostRA)) { + isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) { --RegionEnd; // Count the boundary instruction. --RemainingInstrs; @@ -440,14 +444,14 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) { unsigned NumRegionInstrs = 0; MachineBasicBlock::iterator I = RegionEnd; for(;I != MBB->begin(); --I, --RemainingInstrs) { - if (isSchedBoundary(std::prev(I), MBB, MF, TII, IsPostRA)) + if (isSchedBoundary(&*std::prev(I), &*MBB, MF, TII)) break; if (!I->isDebugValue()) ++NumRegionInstrs; } // Notify the scheduler of the region, even if we may skip scheduling // it. Perhaps it still needs to be bundled. - Scheduler.enterRegion(MBB, I, RegionEnd, NumRegionInstrs); + Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs); // Skip empty scheduling regions (0 or 1 schedulable instructions). if (I == RegionEnd || I == std::prev(RegionEnd)) { @@ -456,8 +460,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) { Scheduler.exitRegion(); continue; } - DEBUG(dbgs() << "********** " << ((Scheduler.isPostRA()) ? "PostRA " : "") - << "MI Scheduling **********\n"); + DEBUG(dbgs() << "********** MI Scheduling **********\n"); DEBUG(dbgs() << MF->getName() << ":BB#" << MBB->getNumber() << " " << MBB->getName() << "\n From: " << *I << " To: "; @@ -484,11 +487,11 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) { } assert(RemainingInstrs == 0 && "Instruction count mismatch!"); Scheduler.finishBlock(); - if (Scheduler.isPostRA()) { - // FIXME: Ideally, no further passes should rely on kill flags. However, - // thumb2 size reduction is currently an exception. - Scheduler.fixupKills(MBB); - } + // FIXME: Ideally, no further passes should rely on kill flags. However, + // thumb2 size reduction is currently an exception, so the PostMIScheduler + // needs to do this. + if (FixKillFlags) + Scheduler.fixupKills(&*MBB); } Scheduler.finalizeSchedule(); } @@ -499,7 +502,7 @@ void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const { LLVM_DUMP_METHOD void ReadyQueue::dump() { - dbgs() << Name << ": "; + dbgs() << "Queue " << Name << ": "; for (unsigned i = 0, e = Queue.size(); i < e; ++i) dbgs() << Queue[i]->NodeNum << " "; dbgs() << "\n"; @@ -660,6 +663,9 @@ bool ScheduleDAGMI::checkSchedLimit() { /// does not consider liveness or register pressure. It is useful for PostRA /// scheduling and potentially other custom schedulers. void ScheduleDAGMI::schedule() { + DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n"); + DEBUG(SchedImpl->dumpPolicy()); + // Build the DAG. buildSchedGraph(AA); @@ -682,7 +688,11 @@ void ScheduleDAGMI::schedule() { initQueues(TopRoots, BotRoots); bool IsTopNode = false; - while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { + while (true) { + DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n"); + SUnit *SU = SchedImpl->pickNode(IsTopNode); + if (!SU) break; + assert(!SU->isScheduled && "Node already scheduled"); if (!checkSchedLimit()) break; @@ -900,6 +910,13 @@ void ScheduleDAGMILive::initRegPressure() { updatePressureDiffs(LiveUses); } + DEBUG( + dbgs() << "Top Pressure:\n"; + dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI); + dbgs() << "Bottom Pressure:\n"; + dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI); + ); + assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom"); // Cache the list of excess pressure sets in this region. This will also track @@ -976,18 +993,24 @@ void ScheduleDAGMILive::updatePressureDiffs(ArrayRef<unsigned> LiveUses) { } // RegisterPressureTracker guarantees that readsReg is true for LiveUses. assert(VNI && "No live value at use."); - for (VReg2UseMap::iterator - UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) { - SUnit *SU = UI->SU; - DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") " - << *SU->getInstr()); + for (const VReg2SUnit &V2SU + : make_range(VRegUses.find(Reg), VRegUses.end())) { + SUnit *SU = V2SU.SU; // If this use comes before the reaching def, it cannot be a last use, so // descrease its pressure change. if (!SU->isScheduled && SU != &ExitSU) { LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(SU->getInstr())); - if (LRQ.valueIn() == VNI) - getPressureDiff(SU).addPressureChange(Reg, true, &MRI); + if (LRQ.valueIn() == VNI) { + PressureDiff &PDiff = getPressureDiff(SU); + PDiff.addPressureChange(Reg, true, &MRI); + DEBUG( + dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") " + << *SU->getInstr(); + dbgs() << " to "; + PDiff.dump(*TRI); + ); + } } } } @@ -998,12 +1021,14 @@ void ScheduleDAGMILive::updatePressureDiffs(ArrayRef<unsigned> LiveUses) { /// only includes instructions that have DAG nodes, not scheduling boundaries. /// /// This is a skeletal driver, with all the functionality pushed into helpers, -/// so that it can be easilly extended by experimental schedulers. Generally, +/// so that it can be easily extended by experimental schedulers. Generally, /// implementing MachineSchedStrategy should be sufficient to implement a new /// scheduling algorithm. However, if a scheduler further subclasses /// ScheduleDAGMILive then it will want to override this virtual method in order /// to update any specialized state. void ScheduleDAGMILive::schedule() { + DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n"); + DEBUG(SchedImpl->dumpPolicy()); buildDAGWithRegPressure(); Topo.InitDAGTopologicalSorting(); @@ -1017,8 +1042,16 @@ void ScheduleDAGMILive::schedule() { // This may initialize a DFSResult to be used for queue priority. SchedImpl->initialize(this); - DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) - SUnits[su].dumpAll(this)); + DEBUG( + for (const SUnit &SU : SUnits) { + SU.dumpAll(this); + if (ShouldTrackPressure) { + dbgs() << " Pressure Diff : "; + getPressureDiff(&SU).dump(*TRI); + } + dbgs() << '\n'; + } + ); if (ViewMISchedDAGs) viewGraph(); // Initialize ready queues now that the DAG and priority data are finalized. @@ -1030,7 +1063,11 @@ void ScheduleDAGMILive::schedule() { } bool IsTopNode = false; - while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { + while (true) { + DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n"); + SUnit *SU = SchedImpl->pickNode(IsTopNode); + if (!SU) break; + assert(!SU->isScheduled && "Node already scheduled"); if (!checkSchedLimit()) break; @@ -1149,14 +1186,15 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() { unsigned LiveOutHeight = DefSU->getHeight(); unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency; // Visit all local users of the vreg def. - for (VReg2UseMap::iterator - UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) { - if (UI->SU == &ExitSU) + for (const VReg2SUnit &V2SU + : make_range(VRegUses.find(Reg), VRegUses.end())) { + SUnit *SU = V2SU.SU; + if (SU == &ExitSU) continue; // Only consider uses of the phi. LiveQueryResult LRQ = - LI.Query(LIS->getInstructionIndex(UI->SU->getInstr())); + LI.Query(LIS->getInstructionIndex(SU->getInstr())); if (!LRQ.valueIn()->isPHIDef()) continue; @@ -1164,10 +1202,10 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() { // overestimate in strange cases. This allows cyclic latency to be // estimated as the minimum slack of the vreg's depth or height. unsigned CyclicLatency = 0; - if (LiveOutDepth > UI->SU->getDepth()) - CyclicLatency = LiveOutDepth - UI->SU->getDepth(); + if (LiveOutDepth > SU->getDepth()) + CyclicLatency = LiveOutDepth - SU->getDepth(); - unsigned LiveInHeight = UI->SU->getHeight() + DefSU->Latency; + unsigned LiveInHeight = SU->getHeight() + DefSU->Latency; if (LiveInHeight > LiveOutHeight) { if (LiveInHeight - LiveOutHeight < CyclicLatency) CyclicLatency = LiveInHeight - LiveOutHeight; @@ -1176,7 +1214,7 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() { CyclicLatency = 0; DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU(" - << UI->SU->NodeNum << ") = " << CyclicLatency << "c\n"); + << SU->NodeNum << ") = " << CyclicLatency << "c\n"); if (CyclicLatency > MaxCyclicLatency) MaxCyclicLatency = CyclicLatency; } @@ -1203,6 +1241,11 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) { // Update top scheduled pressure. TopRPTracker.advance(); assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); + DEBUG( + dbgs() << "Top Pressure:\n"; + dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI); + ); + updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure); } } @@ -1225,6 +1268,11 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) { SmallVector<unsigned, 8> LiveUses; BotRPTracker.recede(&LiveUses); assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); + DEBUG( + dbgs() << "Bottom Pressure:\n"; + dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI); + ); + updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure); updatePressureDiffs(LiveUses); } @@ -1349,25 +1397,49 @@ namespace { /// \brief Post-process the DAG to create cluster edges between instructions /// that may be fused by the processor into a single operation. class MacroFusion : public ScheduleDAGMutation { - const TargetInstrInfo *TII; + const TargetInstrInfo &TII; + const TargetRegisterInfo &TRI; public: - MacroFusion(const TargetInstrInfo *tii): TII(tii) {} + MacroFusion(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) + : TII(TII), TRI(TRI) {} void apply(ScheduleDAGMI *DAG) override; }; } // anonymous +/// Returns true if \p MI reads a register written by \p Other. +static bool HasDataDep(const TargetRegisterInfo &TRI, const MachineInstr &MI, + const MachineInstr &Other) { + for (const MachineOperand &MO : MI.uses()) { + if (!MO.isReg() || !MO.readsReg()) + continue; + + unsigned Reg = MO.getReg(); + if (Other.modifiesRegister(Reg, &TRI)) + return true; + } + return false; +} + /// \brief Callback from DAG postProcessing to create cluster edges to encourage /// fused operations. void MacroFusion::apply(ScheduleDAGMI *DAG) { // For now, assume targets can only fuse with the branch. - MachineInstr *Branch = DAG->ExitSU.getInstr(); + SUnit &ExitSU = DAG->ExitSU; + MachineInstr *Branch = ExitSU.getInstr(); if (!Branch) return; - for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) { - SUnit *SU = &DAG->SUnits[--Idx]; - if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch)) + for (SUnit &SU : DAG->SUnits) { + // SUnits with successors can't be schedule in front of the ExitSU. + if (!SU.Succs.empty()) + continue; + // We only care if the node writes to a register that the branch reads. + MachineInstr *Pred = SU.getInstr(); + if (!HasDataDep(TRI, *Branch, *Pred)) + continue; + + if (!TII.shouldScheduleAdjacent(Pred, Branch)) continue; // Create a single weak edge from SU to ExitSU. The only effect is to cause @@ -1376,11 +1448,11 @@ void MacroFusion::apply(ScheduleDAGMI *DAG) { // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling // of SU, we could create an artificial edge from the deepest root, but it // hasn't been needed yet. - bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster)); + bool Success = DAG->addEdge(&ExitSU, SDep(&SU, SDep::Cluster)); (void)Success; assert(Success && "No DAG nodes should be reachable from ExitSU"); - DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n"); + DEBUG(dbgs() << "Macro Fuse SU(" << SU.NodeNum << ")\n"); break; } } @@ -2277,7 +2349,7 @@ void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) { Latency = Cand.SU->getDepth(); break; } - dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason); + dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason); if (P.isValid()) dbgs() << " " << TRI->getRegPressureSetName(P.getPSet()) << ":" << P.getUnitInc() << " "; @@ -2438,6 +2510,14 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, } } +void GenericScheduler::dumpPolicy() { + dbgs() << "GenericScheduler RegionPolicy: " + << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure + << " OnlyTopDown=" << RegionPolicy.OnlyTopDown + << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp + << "\n"; +} + /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic /// critical path by more cycles than it takes to drain the instruction buffer. /// We estimate an upper bounds on in-flight instructions as: @@ -2499,11 +2579,13 @@ static bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, - GenericSchedulerBase::CandReason Reason) { - int TryRank = TryP.getPSetOrMax(); - int CandRank = CandP.getPSetOrMax(); + GenericSchedulerBase::CandReason Reason, + const TargetRegisterInfo *TRI, + const MachineFunction &MF) { + unsigned TryPSet = TryP.getPSetOrMax(); + unsigned CandPSet = CandP.getPSetOrMax(); // If both candidates affect the same set, go with the smallest increase. - if (TryRank == CandRank) { + if (TryPSet == CandPSet) { return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand, Reason); } @@ -2513,6 +2595,13 @@ static bool tryPressure(const PressureChange &TryP, Reason)) { return true; } + + int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) : + std::numeric_limits<int>::max(); + + int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) : + std::numeric_limits<int>::max(); + // If the candidates are decreasing pressure, reverse priority. if (TryP.getUnitInc() < 0) std::swap(TryRank, CandRank); @@ -2597,7 +2686,7 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand, } } DEBUG(if (TryCand.RPDelta.Excess.isValid()) - dbgs() << " SU(" << TryCand.SU->NodeNum << ") " + dbgs() << " Try SU(" << TryCand.SU->NodeNum << ") " << TRI->getRegPressureSetName(TryCand.RPDelta.Excess.getPSet()) << ":" << TryCand.RPDelta.Excess.getUnitInc() << "\n"); @@ -2615,13 +2704,15 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand, // Avoid exceeding the target's limit. if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, - TryCand, Cand, RegExcess)) + TryCand, Cand, RegExcess, TRI, + DAG->MF)) return; // Avoid increasing the max critical pressure in the scheduled region. if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax, - TryCand, Cand, RegCritical)) + TryCand, Cand, RegCritical, TRI, + DAG->MF)) return; // For loops that are acyclic path limited, aggressively schedule for latency. @@ -2657,7 +2748,8 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand, // Avoid increasing the max pressure of the entire region. if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, - TryCand, Cand, RegMax)) + TryCand, Cand, RegMax, TRI, + DAG->MF)) return; // Avoid critical resource consumption and balance the schedule. @@ -2672,8 +2764,8 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand, // Avoid serializing long latency dependence chains. // For acyclic path limited loops, latency was already checked above. - if (Cand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited - && tryLatency(TryCand, Cand, Zone)) { + if (!RegionPolicy.DisableLatencyHeuristic && Cand.Policy.ReduceLatency && + !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, Zone)) { return; } @@ -2727,12 +2819,12 @@ SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) { // efficient, but also provides the best heuristics for CriticalPSets. if (SUnit *SU = Bot.pickOnlyChoice()) { IsTopNode = false; - DEBUG(dbgs() << "Pick Bot NOCAND\n"); + DEBUG(dbgs() << "Pick Bot ONLY1\n"); return SU; } if (SUnit *SU = Top.pickOnlyChoice()) { IsTopNode = true; - DEBUG(dbgs() << "Pick Top NOCAND\n"); + DEBUG(dbgs() << "Pick Top ONLY1\n"); return SU; } CandPolicy NoPolicy; @@ -2887,7 +2979,7 @@ static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C) { if (EnableLoadCluster && DAG->TII->enableClusterLoads()) DAG->addMutation(make_unique<LoadClusterMutation>(DAG->TII, DAG->TRI)); if (EnableMacroFusion) - DAG->addMutation(make_unique<MacroFusion>(DAG->TII)); + DAG->addMutation(make_unique<MacroFusion>(*DAG->TII, *DAG->TRI)); return DAG; } @@ -3254,12 +3346,10 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits { } static bool isNodeHidden(const SUnit *Node) { - return (Node->Preds.size() > 10 || Node->Succs.size() > 10); - } - - static bool hasNodeAddressLabel(const SUnit *Node, - const ScheduleDAG *Graph) { - return false; + if (ViewMISchedCutoff == 0) + return false; + return (Node->Preds.size() > ViewMISchedCutoff + || Node->Succs.size() > ViewMISchedCutoff); } /// If you want to override the dot attributes printed for a particular diff --git a/contrib/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm/lib/CodeGen/MachineSink.cpp index 1b9be50068a9..5e6d6190c638 100644 --- a/contrib/llvm/lib/CodeGen/MachineSink.cpp +++ b/contrib/llvm/lib/CodeGen/MachineSink.cpp @@ -87,7 +87,7 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); - AU.addRequired<AliasAnalysis>(); + AU.addRequired<AAResultsWrapperPass>(); AU.addRequired<MachineDominatorTree>(); AU.addRequired<MachinePostDominatorTree>(); AU.addRequired<MachineLoopInfo>(); @@ -150,7 +150,7 @@ INITIALIZE_PASS_BEGIN(MachineSinking, "machine-sink", "Machine code sinking", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(MachineSinking, "machine-sink", "Machine code sinking", false, false) @@ -268,7 +268,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { PDT = &getAnalysis<MachinePostDominatorTree>(); LI = &getAnalysis<MachineLoopInfo>(); MBFI = UseBlockFreqInfo ? &getAnalysis<MachineBlockFrequencyInfo>() : nullptr; - AA = &getAnalysis<AliasAnalysis>(); + AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); bool EverMadeChange = false; @@ -667,7 +667,7 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, // It's not safe to sink instructions to EH landing pad. Control flow into // landing pad is implicitly defined. - if (SuccToSinkTo && SuccToSinkTo->isLandingPad()) + if (SuccToSinkTo && SuccToSinkTo->isEHPad()) return nullptr; return SuccToSinkTo; @@ -686,7 +686,8 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore, if (!MI->isSafeToMove(AA, SawStore)) return false; - // Convergent operations may only be moved to control equivalent locations. + // Convergent operations may not be made control-dependent on additional + // values. if (MI->isConvergent()) return false; diff --git a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp index d9a6b68462eb..f7edacd5ebaf 100644 --- a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp +++ b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp @@ -724,13 +724,12 @@ static void updatePhysDepsDownwards(const MachineInstr *UseMI, // Update RegUnits to reflect live registers after UseMI. // First kills. - for (unsigned i = 0, e = Kills.size(); i != e; ++i) - for (MCRegUnitIterator Units(Kills[i], TRI); Units.isValid(); ++Units) + for (unsigned Kill : Kills) + for (MCRegUnitIterator Units(Kill, TRI); Units.isValid(); ++Units) RegUnits.erase(*Units); // Second, live defs. - for (unsigned i = 0, e = LiveDefOps.size(); i != e; ++i) { - unsigned DefOp = LiveDefOps[i]; + for (unsigned DefOp : LiveDefOps) { for (MCRegUnitIterator Units(UseMI->getOperand(DefOp).getReg(), TRI); Units.isValid(); ++Units) { LiveRegUnit &LRU = RegUnits[*Units]; @@ -756,8 +755,7 @@ computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) { assert(TBI.HasValidInstrDepths && "Missing depth info"); assert(TBI.HasValidInstrHeights && "Missing height info"); unsigned MaxLen = 0; - for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) { - const LiveInReg &LIR = TBI.LiveIns[i]; + for (const LiveInReg &LIR : TBI.LiveIns) { if (!TargetRegisterInfo::isVirtualRegister(LIR.Reg)) continue; const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg); diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp index ca35ec5fdcf8..cdcd8eb4fbdf 100644 --- a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp @@ -28,6 +28,7 @@ #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/EHPersonalities.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/LiveVariables.h" @@ -42,7 +43,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" -#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -204,18 +204,19 @@ namespace { void visitMachineBasicBlockAfter(const MachineBasicBlock *MBB); void visitMachineFunctionAfter(); + template <typename T> void report(const char *msg, ilist_iterator<T> I) { + report(msg, &*I); + } void report(const char *msg, const MachineFunction *MF); void report(const char *msg, const MachineBasicBlock *MBB); void report(const char *msg, const MachineInstr *MI); void report(const char *msg, const MachineOperand *MO, unsigned MONum); - void report(const char *msg, const MachineFunction *MF, - const LiveInterval &LI); - void report(const char *msg, const MachineBasicBlock *MBB, - const LiveInterval &LI); - void report(const char *msg, const MachineFunction *MF, - const LiveRange &LR, unsigned Reg, unsigned LaneMask); - void report(const char *msg, const MachineBasicBlock *MBB, - const LiveRange &LR, unsigned Reg, unsigned LaneMask); + + void report_context(const LiveInterval &LI) const; + void report_context(const LiveRange &LR, unsigned Reg, + LaneBitmask LaneMask) const; + void report_context(const LiveRange::Segment &S) const; + void report_context(const VNInfo &VNI) const; void verifyInlineAsm(const MachineInstr *MI); @@ -233,9 +234,11 @@ namespace { void verifyLiveRangeSegment(const LiveRange&, const LiveRange::const_iterator I, unsigned, unsigned); - void verifyLiveRange(const LiveRange&, unsigned, unsigned LaneMask = 0); + void verifyLiveRange(const LiveRange&, unsigned, LaneBitmask LaneMask = 0); void verifyStackFrame(); + + void verifySlotIndexes() const; }; struct MachineVerifierPass : public MachineFunctionPass { @@ -273,6 +276,19 @@ void MachineFunction::verify(Pass *p, const char *Banner) const { .runOnMachineFunction(const_cast<MachineFunction&>(*this)); } +void MachineVerifier::verifySlotIndexes() const { + if (Indexes == nullptr) + return; + + // Ensure the IdxMBB list is sorted by slot indexes. + SlotIndex Last; + for (SlotIndexes::MBBIndexIterator I = Indexes->MBBIndexBegin(), + E = Indexes->MBBIndexEnd(); I != E; ++I) { + assert(!Last.isValid() || I->first > Last); + Last = I->first; + } +} + bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { foundErrors = 0; @@ -295,10 +311,12 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { Indexes = PASS->getAnalysisIfAvailable<SlotIndexes>(); } + verifySlotIndexes(); + visitMachineFunctionBefore(); for (MachineFunction::const_iterator MFI = MF.begin(), MFE = MF.end(); MFI!=MFE; ++MFI) { - visitMachineBasicBlockBefore(MFI); + visitMachineBasicBlockBefore(&*MFI); // Keep track of the current bundle header. const MachineInstr *CurBundle = nullptr; // Do we expect the next instruction to be part of the same bundle? @@ -306,7 +324,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { for (MachineBasicBlock::const_instr_iterator MBBI = MFI->instr_begin(), MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) { - if (MBBI->getParent() != MFI) { + if (MBBI->getParent() != &*MFI) { report("Bad instruction parent pointer", MFI); errs() << "Instruction: " << *MBBI; continue; @@ -315,20 +333,22 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { // Check for consistent bundle flags. if (InBundle && !MBBI->isBundledWithPred()) report("Missing BundledPred flag, " - "BundledSucc was set on predecessor", MBBI); + "BundledSucc was set on predecessor", + &*MBBI); if (!InBundle && MBBI->isBundledWithPred()) report("BundledPred flag is set, " - "but BundledSucc not set on predecessor", MBBI); + "but BundledSucc not set on predecessor", + &*MBBI); // Is this a bundle header? if (!MBBI->isInsideBundle()) { if (CurBundle) visitMachineBundleAfter(CurBundle); - CurBundle = MBBI; + CurBundle = &*MBBI; visitMachineBundleBefore(CurBundle); } else if (!CurBundle) report("No bundle header", MBBI); - visitMachineInstrBefore(MBBI); + visitMachineInstrBefore(&*MBBI); for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) { const MachineInstr &MI = *MBBI; const MachineOperand &Op = MI.getOperand(I); @@ -341,7 +361,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { visitMachineOperand(&Op, I); } - visitMachineInstrAfter(MBBI); + visitMachineInstrAfter(&*MBBI); // Was this the last bundled instruction? InBundle = MBBI->isBundledWithSucc(); @@ -350,7 +370,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { visitMachineBundleAfter(CurBundle); if (InBundle) report("BundledSucc flag set on last instruction in block", &MFI->back()); - visitMachineBasicBlockAfter(MFI); + visitMachineBasicBlockAfter(&*MFI); } visitMachineFunctionAfter(); @@ -375,7 +395,10 @@ void MachineVerifier::report(const char *msg, const MachineFunction *MF) { if (!foundErrors++) { if (Banner) errs() << "# " << Banner << '\n'; - MF->print(errs(), Indexes); + if (LiveInts != nullptr) + LiveInts->print(errs()); + else + MF->print(errs(), Indexes); } errs() << "*** Bad machine code: " << msg << " ***\n" << "- function: " << MF->getName() << "\n"; @@ -399,7 +422,8 @@ void MachineVerifier::report(const char *msg, const MachineInstr *MI) { errs() << "- instruction: "; if (Indexes && Indexes->hasIndex(MI)) errs() << Indexes->getInstructionIndex(MI) << '\t'; - MI->print(errs(), TM); + MI->print(errs(), /*SkipOpers=*/true); + errs() << '\n'; } void MachineVerifier::report(const char *msg, @@ -411,36 +435,24 @@ void MachineVerifier::report(const char *msg, errs() << "\n"; } -void MachineVerifier::report(const char *msg, const MachineFunction *MF, - const LiveInterval &LI) { - report(msg, MF); - errs() << "- interval: " << LI << '\n'; -} - -void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB, - const LiveInterval &LI) { - report(msg, MBB); +void MachineVerifier::report_context(const LiveInterval &LI) const { errs() << "- interval: " << LI << '\n'; } -void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB, - const LiveRange &LR, unsigned Reg, - unsigned LaneMask) { - report(msg, MBB); - errs() << "- liverange: " << LR << '\n'; +void MachineVerifier::report_context(const LiveRange &LR, unsigned Reg, + LaneBitmask LaneMask) const { errs() << "- register: " << PrintReg(Reg, TRI) << '\n'; if (LaneMask != 0) - errs() << "- lanemask: " << format("%04X\n", LaneMask); + errs() << "- lanemask: " << PrintLaneMask(LaneMask) << '\n'; + errs() << "- liverange: " << LR << '\n'; } -void MachineVerifier::report(const char *msg, const MachineFunction *MF, - const LiveRange &LR, unsigned Reg, - unsigned LaneMask) { - report(msg, MF); - errs() << "- liverange: " << LR << '\n'; - errs() << "- register: " << PrintReg(Reg, TRI) << '\n'; - if (LaneMask != 0) - errs() << "- lanemask: " << format("%04X\n", LaneMask); +void MachineVerifier::report_context(const LiveRange::Segment &S) const { + errs() << "- segment: " << S << '\n'; +} + +void MachineVerifier::report_context(const VNInfo &VNI) const { + errs() << "- ValNo: " << VNI.id << " (def " << VNI.def << ")\n"; } void MachineVerifier::markReachable(const MachineBasicBlock *MBB) { @@ -507,11 +519,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { if (MRI->isSSA()) { // If this block has allocatable physical registers live-in, check that // it is an entry block or landing pad. - for (MachineBasicBlock::livein_iterator LI = MBB->livein_begin(), - LE = MBB->livein_end(); - LI != LE; ++LI) { - unsigned reg = *LI; - if (isAllocatable(reg) && !MBB->isLandingPad() && + for (const auto &LI : MBB->liveins()) { + if (isAllocatable(LI.PhysReg) && !MBB->isEHPad() && MBB != MBB->getParent()->begin()) { report("MBB has allocable live-in, but isn't entry or landing-pad.", MBB); } @@ -522,7 +531,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { SmallPtrSet<MachineBasicBlock*, 4> LandingPadSuccs; for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { - if ((*I)->isLandingPad()) + if ((*I)->isEHPad()) LandingPadSuccs.insert(*I); if (!FunctionBlocks.count(*I)) report("MBB has successor that isn't part of the function.", MBB); @@ -547,10 +556,12 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { const MCAsmInfo *AsmInfo = TM->getMCAsmInfo(); const BasicBlock *BB = MBB->getBasicBlock(); + const Function *Fn = MF->getFunction(); if (LandingPadSuccs.size() > 1 && !(AsmInfo && AsmInfo->getExceptionHandlingType() == ExceptionHandling::SjLj && - BB && isa<SwitchInst>(BB->getTerminator()))) + BB && isa<SwitchInst>(BB->getTerminator())) && + !isFuncletEHPersonality(classifyEHPersonality(Fn->getPersonalityFn()))) report("MBB has more than one landing pad successor", MBB); // Call AnalyzeBranch. If it succeeds, there several more conditions to check. @@ -562,7 +573,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { // check whether its answers match up with reality. if (!TBB && !FBB) { // Block falls through to its successor. - MachineFunction::const_iterator MBBI = MBB; + MachineFunction::const_iterator MBBI = MBB->getIterator(); ++MBBI; if (MBBI == MF->end()) { // It's possible that the block legitimately ends with a noreturn @@ -575,7 +586,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { } else if (MBB->succ_size() != 1+LandingPadSuccs.size()) { report("MBB exits via unconditional fall-through but doesn't have " "exactly one CFG successor!", MBB); - } else if (!MBB->isSuccessor(MBBI)) { + } else if (!MBB->isSuccessor(&*MBBI)) { report("MBB exits via unconditional fall-through but its successor " "differs from its CFG successor!", MBB); } @@ -613,7 +624,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { } } else if (TBB && !FBB && !Cond.empty()) { // Block conditionally branches somewhere, otherwise falls through. - MachineFunction::const_iterator MBBI = MBB; + MachineFunction::const_iterator MBBI = MBB->getIterator(); ++MBBI; if (MBBI == MF->end()) { report("MBB conditionally falls through out of function!", MBB); @@ -628,7 +639,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { } else if (MBB->succ_size() != 2) { report("MBB exits via conditional branch/fall-through but doesn't have " "exactly two CFG successors!", MBB); - } else if (!matchPair(MBB->succ_begin(), TBB, MBBI)) { + } else if (!matchPair(MBB->succ_begin(), TBB, &*MBBI)) { report("MBB exits via conditional branch/fall-through but the CFG " "successors don't match the actual successors!", MBB); } @@ -680,13 +691,12 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { } regsLive.clear(); - for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(), - E = MBB->livein_end(); I != E; ++I) { - if (!TargetRegisterInfo::isPhysicalRegister(*I)) { + for (const auto &LI : MBB->liveins()) { + if (!TargetRegisterInfo::isPhysicalRegister(LI.PhysReg)) { report("MBB live-in list contains non-physical register", MBB); continue; } - for (MCSubRegIterator SubRegs(*I, TRI, /*IncludeSelf=*/true); + for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true); SubRegs.isValid(); ++SubRegs) regsLive.insert(*SubRegs); } @@ -822,9 +832,12 @@ void MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { const MachineInstr *MI = MO->getParent(); const MCInstrDesc &MCID = MI->getDesc(); + unsigned NumDefs = MCID.getNumDefs(); + if (MCID.getOpcode() == TargetOpcode::PATCHPOINT) + NumDefs = (MONum == 0 && MO->isReg()) ? NumDefs : 0; // The first MCID.NumDefs operands must be explicit register defines - if (MONum < MCID.getNumDefs()) { + if (MONum < NumDefs) { const MCOperandInfo &MCOI = MCID.OpInfo[MONum]; if (!MO->isReg()) report("Explicit definition must be a register", MO, MONum); @@ -972,13 +985,38 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { case MachineOperand::MO_FrameIndex: if (LiveStks && LiveStks->hasInterval(MO->getIndex()) && LiveInts && !LiveInts->isNotInMIMap(MI)) { - LiveInterval &LI = LiveStks->getInterval(MO->getIndex()); + int FI = MO->getIndex(); + LiveInterval &LI = LiveStks->getInterval(FI); SlotIndex Idx = LiveInts->getInstructionIndex(MI); - if (MI->mayLoad() && !LI.liveAt(Idx.getRegSlot(true))) { + + bool stores = MI->mayStore(); + bool loads = MI->mayLoad(); + // For a memory-to-memory move, we need to check if the frame + // index is used for storing or loading, by inspecting the + // memory operands. + if (stores && loads) { + for (auto *MMO : MI->memoperands()) { + const PseudoSourceValue *PSV = MMO->getPseudoValue(); + if (PSV == nullptr) continue; + const FixedStackPseudoSourceValue *Value = + dyn_cast<FixedStackPseudoSourceValue>(PSV); + if (Value == nullptr) continue; + if (Value->getFrameIndex() != FI) continue; + + if (MMO->isStore()) + loads = false; + else + stores = false; + break; + } + if (loads == stores) + report("Missing fixed stack memoperand.", MI); + } + if (loads && !LI.liveAt(Idx.getRegSlot(true))) { report("Instruction loads from dead spill slot", MO, MONum); errs() << "Live stack: " << LI << '\n'; } - if (MI->mayStore() && !LI.liveAt(Idx.getRegSlot())) { + if (stores && !LI.liveAt(Idx.getRegSlot())) { report("Instruction stores to dead spill slot", MO, MONum); errs() << "Live stack: " << LI << '\n'; } @@ -1387,40 +1425,39 @@ void MachineVerifier::verifyLiveIntervals() { void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, const VNInfo *VNI, unsigned Reg, - unsigned LaneMask) { + LaneBitmask LaneMask) { if (VNI->isUnused()) return; const VNInfo *DefVNI = LR.getVNInfoAt(VNI->def); if (!DefVNI) { - report("Valno not live at def and not marked unused", MF, LR, Reg, - LaneMask); - errs() << "Valno #" << VNI->id << '\n'; + report("Value not live at VNInfo def and not marked unused", MF); + report_context(LR, Reg, LaneMask); + report_context(*VNI); return; } if (DefVNI != VNI) { - report("Live segment at def has different valno", MF, LR, Reg, LaneMask); - errs() << "Valno #" << VNI->id << " is defined at " << VNI->def - << " where valno #" << DefVNI->id << " is live\n"; + report("Live segment at def has different VNInfo", MF); + report_context(LR, Reg, LaneMask); + report_context(*VNI); return; } const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def); if (!MBB) { - report("Invalid definition index", MF, LR, Reg, LaneMask); - errs() << "Valno #" << VNI->id << " is defined at " << VNI->def - << " in " << LR << '\n'; + report("Invalid VNInfo definition index", MF); + report_context(LR, Reg, LaneMask); + report_context(*VNI); return; } if (VNI->isPHIDef()) { if (VNI->def != LiveInts->getMBBStartIdx(MBB)) { - report("PHIDef value is not defined at MBB start", MBB, LR, Reg, - LaneMask); - errs() << "Valno #" << VNI->id << " is defined at " << VNI->def - << ", not at the beginning of BB#" << MBB->getNumber() << '\n'; + report("PHIDef VNInfo is not defined at MBB start", MBB); + report_context(LR, Reg, LaneMask); + report_context(*VNI); } return; } @@ -1428,8 +1465,9 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, // Non-PHI def. const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def); if (!MI) { - report("No instruction at def index", MBB, LR, Reg, LaneMask); - errs() << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; + report("No instruction at VNInfo def index", MBB); + report_context(LR, Reg, LaneMask); + report_context(*VNI); return; } @@ -1457,60 +1495,67 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, if (!hasDef) { report("Defining instruction does not modify register", MI); - errs() << "Valno #" << VNI->id << " in " << LR << '\n'; + report_context(LR, Reg, LaneMask); + report_context(*VNI); } // Early clobber defs begin at USE slots, but other defs must begin at // DEF slots. if (isEarlyClobber) { if (!VNI->def.isEarlyClobber()) { - report("Early clobber def must be at an early-clobber slot", MBB, LR, - Reg, LaneMask); - errs() << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; + report("Early clobber def must be at an early-clobber slot", MBB); + report_context(LR, Reg, LaneMask); + report_context(*VNI); } } else if (!VNI->def.isRegister()) { - report("Non-PHI, non-early clobber def must be at a register slot", - MBB, LR, Reg, LaneMask); - errs() << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; + report("Non-PHI, non-early clobber def must be at a register slot", MBB); + report_context(LR, Reg, LaneMask); + report_context(*VNI); } } } void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, const LiveRange::const_iterator I, - unsigned Reg, unsigned LaneMask) { + unsigned Reg, LaneBitmask LaneMask) +{ const LiveRange::Segment &S = *I; const VNInfo *VNI = S.valno; assert(VNI && "Live segment has no valno"); if (VNI->id >= LR.getNumValNums() || VNI != LR.getValNumInfo(VNI->id)) { - report("Foreign valno in live segment", MF, LR, Reg, LaneMask); - errs() << S << " has a bad valno\n"; + report("Foreign valno in live segment", MF); + report_context(LR, Reg, LaneMask); + report_context(S); + report_context(*VNI); } if (VNI->isUnused()) { - report("Live segment valno is marked unused", MF, LR, Reg, LaneMask); - errs() << S << '\n'; + report("Live segment valno is marked unused", MF); + report_context(LR, Reg, LaneMask); + report_context(S); } const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(S.start); if (!MBB) { - report("Bad start of live segment, no basic block", MF, LR, Reg, LaneMask); - errs() << S << '\n'; + report("Bad start of live segment, no basic block", MF); + report_context(LR, Reg, LaneMask); + report_context(S); return; } SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB); if (S.start != MBBStartIdx && S.start != VNI->def) { - report("Live segment must begin at MBB entry or valno def", MBB, LR, Reg, - LaneMask); - errs() << S << '\n'; + report("Live segment must begin at MBB entry or valno def", MBB); + report_context(LR, Reg, LaneMask); + report_context(S); } const MachineBasicBlock *EndMBB = LiveInts->getMBBFromIndex(S.end.getPrevSlot()); if (!EndMBB) { - report("Bad end of live segment, no basic block", MF, LR, Reg, LaneMask); - errs() << S << '\n'; + report("Bad end of live segment, no basic block", MF); + report_context(LR, Reg, LaneMask); + report_context(S); return; } @@ -1527,26 +1572,26 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, const MachineInstr *MI = LiveInts->getInstructionFromIndex(S.end.getPrevSlot()); if (!MI) { - report("Live segment doesn't end at a valid instruction", EndMBB, LR, Reg, - LaneMask); - errs() << S << '\n'; + report("Live segment doesn't end at a valid instruction", EndMBB); + report_context(LR, Reg, LaneMask); + report_context(S); return; } // The block slot must refer to a basic block boundary. if (S.end.isBlock()) { - report("Live segment ends at B slot of an instruction", EndMBB, LR, Reg, - LaneMask); - errs() << S << '\n'; + report("Live segment ends at B slot of an instruction", EndMBB); + report_context(LR, Reg, LaneMask); + report_context(S); } if (S.end.isDead()) { // Segment ends on the dead slot. // That means there must be a dead def. if (!SlotIndex::isSameInstr(S.start, S.end)) { - report("Live segment ending at dead slot spans instructions", EndMBB, LR, - Reg, LaneMask); - errs() << S << '\n'; + report("Live segment ending at dead slot spans instructions", EndMBB); + report_context(LR, Reg, LaneMask); + report_context(S); } } @@ -1555,9 +1600,9 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, if (S.end.isEarlyClobber()) { if (I+1 == LR.end() || (I+1)->start != S.end) { report("Live segment ending at early clobber slot must be " - "redefined by an EC def in the same instruction", EndMBB, LR, Reg, - LaneMask); - errs() << S << '\n'; + "redefined by an EC def in the same instruction", EndMBB); + report_context(LR, Reg, LaneMask); + report_context(S); } } @@ -1587,14 +1632,15 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, !hasSubRegDef) { report("Instruction ending live segment doesn't read the register", MI); - errs() << S << " in " << LR << '\n'; + report_context(LR, Reg, LaneMask); + report_context(S); } } } } // Now check all the basic blocks in this live segment. - MachineFunction::const_iterator MFI = MBB; + MachineFunction::const_iterator MFI = MBB->getIterator(); // Is this live segment the beginning of a non-PHIDef VN? if (S.start == VNI->def && !VNI->isPHIDef()) { // Not live-in to any blocks. @@ -1604,10 +1650,10 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, ++MFI; } for (;;) { - assert(LiveInts->isLiveInToMBB(LR, MFI)); + assert(LiveInts->isLiveInToMBB(LR, &*MFI)); // We don't know how to track physregs into a landing pad. if (!TargetRegisterInfo::isVirtualRegister(Reg) && - MFI->isLandingPad()) { + MFI->isEHPad()) { if (&*MFI == EndMBB) break; ++MFI; @@ -1616,7 +1662,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, // Is VNI a PHI-def in the current block? bool IsPHI = VNI->isPHIDef() && - VNI->def == LiveInts->getMBBStartIdx(MFI); + VNI->def == LiveInts->getMBBStartIdx(&*MFI); // Check that VNI is live-out of all predecessors. for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(), @@ -1626,22 +1672,23 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, // All predecessors must have a live-out value. if (!PVNI) { - report("Register not marked live out of predecessor", *PI, LR, Reg, - LaneMask); - errs() << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber() - << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before " - << PEnd << '\n'; + report("Register not marked live out of predecessor", *PI); + report_context(LR, Reg, LaneMask); + report_context(*VNI); + errs() << " live into BB#" << MFI->getNumber() + << '@' << LiveInts->getMBBStartIdx(&*MFI) << ", not live before " + << PEnd << '\n'; continue; } // Only PHI-defs can take different predecessor values. if (!IsPHI && PVNI != VNI) { - report("Different value live out of predecessor", *PI, LR, Reg, - LaneMask); + report("Different value live out of predecessor", *PI); + report_context(LR, Reg, LaneMask); errs() << "Valno #" << PVNI->id << " live out of BB#" - << (*PI)->getNumber() << '@' << PEnd - << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber() - << '@' << LiveInts->getMBBStartIdx(MFI) << '\n'; + << (*PI)->getNumber() << '@' << PEnd << "\nValno #" << VNI->id + << " live into BB#" << MFI->getNumber() << '@' + << LiveInts->getMBBStartIdx(&*MFI) << '\n'; } } if (&*MFI == EndMBB) @@ -1651,7 +1698,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, } void MachineVerifier::verifyLiveRange(const LiveRange &LR, unsigned Reg, - unsigned LaneMask) { + LaneBitmask LaneMask) { for (const VNInfo *VNI : LR.valnos) verifyLiveRangeValue(LR, VNI, Reg, LaneMask); @@ -1664,24 +1711,35 @@ void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { assert(TargetRegisterInfo::isVirtualRegister(Reg)); verifyLiveRange(LI, Reg); - unsigned Mask = 0; - unsigned MaxMask = MRI->getMaxLaneMaskForVReg(Reg); + LaneBitmask Mask = 0; + LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg); for (const LiveInterval::SubRange &SR : LI.subranges()) { - if ((Mask & SR.LaneMask) != 0) - report("Lane masks of sub ranges overlap in live interval", MF, LI); - if ((SR.LaneMask & ~MaxMask) != 0) - report("Subrange lanemask is invalid", MF, LI); + if ((Mask & SR.LaneMask) != 0) { + report("Lane masks of sub ranges overlap in live interval", MF); + report_context(LI); + } + if ((SR.LaneMask & ~MaxMask) != 0) { + report("Subrange lanemask is invalid", MF); + report_context(LI); + } + if (SR.empty()) { + report("Subrange must not be empty", MF); + report_context(SR, LI.reg, SR.LaneMask); + } Mask |= SR.LaneMask; verifyLiveRange(SR, LI.reg, SR.LaneMask); - if (!LI.covers(SR)) - report("A Subrange is not covered by the main range", MF, LI); + if (!LI.covers(SR)) { + report("A Subrange is not covered by the main range", MF); + report_context(LI); + } } // Check the LI only has one connected component. ConnectedVNInfoEqClasses ConEQ(*LiveInts); unsigned NumComp = ConEQ.Classify(&LI); if (NumComp > 1) { - report("Multiple connected components in live interval", MF, LI); + report("Multiple connected components in live interval", MF); + report_context(LI); for (unsigned comp = 0; comp != NumComp; ++comp) { errs() << comp << ": valnos"; for (LiveInterval::const_vni_iterator I = LI.vni_begin(), diff --git a/contrib/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm/lib/CodeGen/PHIElimination.cpp index d3433018004c..2c937926d0a7 100644 --- a/contrib/llvm/lib/CodeGen/PHIElimination.cpp +++ b/contrib/llvm/lib/CodeGen/PHIElimination.cpp @@ -548,7 +548,7 @@ void PHIElimination::analyzePHINodes(const MachineFunction& MF) { bool PHIElimination::SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB, MachineLoopInfo *MLI) { - if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad()) + if (MBB.empty() || !MBB.front().isPHI() || MBB.isEHPad()) return false; // Quick exit for basic blocks without PHIs. const MachineLoop *CurLoop = MLI ? MLI->getLoopFor(&MBB) : nullptr; diff --git a/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp index 99bbad1cc280..4cabc3a8c1fd 100644 --- a/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp +++ b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp @@ -28,7 +28,7 @@ llvm::findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB, // Usually, we just want to insert the copy before the first terminator // instruction. However, for the edge going to a landing pad, we must insert // the copy before the call/invoke instruction. - if (!SuccMBB->isLandingPad()) + if (!SuccMBB->isEHPad()) return MBB->getFirstTerminator(); // Discover any defs/uses in this basic block. diff --git a/contrib/llvm/lib/CodeGen/ParallelCG.cpp b/contrib/llvm/lib/CodeGen/ParallelCG.cpp new file mode 100644 index 000000000000..e73ba0296045 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ParallelCG.cpp @@ -0,0 +1,96 @@ +//===-- ParallelCG.cpp ----------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines functions that can be used for parallel code generation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/ParallelCG.h" +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/thread.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Utils/SplitModule.h" + +using namespace llvm; + +static void codegen(Module *M, llvm::raw_pwrite_stream &OS, + const Target *TheTarget, StringRef CPU, StringRef Features, + const TargetOptions &Options, Reloc::Model RM, + CodeModel::Model CM, CodeGenOpt::Level OL, + TargetMachine::CodeGenFileType FileType) { + std::unique_ptr<TargetMachine> TM(TheTarget->createTargetMachine( + M->getTargetTriple(), CPU, Features, Options, RM, CM, OL)); + + legacy::PassManager CodeGenPasses; + if (TM->addPassesToEmitFile(CodeGenPasses, OS, FileType)) + report_fatal_error("Failed to setup codegen"); + CodeGenPasses.run(*M); +} + +std::unique_ptr<Module> +llvm::splitCodeGen(std::unique_ptr<Module> M, + ArrayRef<llvm::raw_pwrite_stream *> OSs, StringRef CPU, + StringRef Features, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, + TargetMachine::CodeGenFileType FileType) { + StringRef TripleStr = M->getTargetTriple(); + std::string ErrMsg; + const Target *TheTarget = TargetRegistry::lookupTarget(TripleStr, ErrMsg); + if (!TheTarget) + report_fatal_error(Twine("Target not found: ") + ErrMsg); + + if (OSs.size() == 1) { + codegen(M.get(), *OSs[0], TheTarget, CPU, Features, Options, RM, CM, + OL, FileType); + return M; + } + + std::vector<thread> Threads; + SplitModule(std::move(M), OSs.size(), [&](std::unique_ptr<Module> MPart) { + // We want to clone the module in a new context to multi-thread the codegen. + // We do it by serializing partition modules to bitcode (while still on the + // main thread, in order to avoid data races) and spinning up new threads + // which deserialize the partitions into separate contexts. + // FIXME: Provide a more direct way to do this in LLVM. + SmallVector<char, 0> BC; + raw_svector_ostream BCOS(BC); + WriteBitcodeToFile(MPart.get(), BCOS); + + llvm::raw_pwrite_stream *ThreadOS = OSs[Threads.size()]; + Threads.emplace_back( + [TheTarget, CPU, Features, Options, RM, CM, OL, FileType, + ThreadOS](const SmallVector<char, 0> &BC) { + LLVMContext Ctx; + ErrorOr<std::unique_ptr<Module>> MOrErr = + parseBitcodeFile(MemoryBufferRef(StringRef(BC.data(), BC.size()), + "<split-module>"), + Ctx); + if (!MOrErr) + report_fatal_error("Failed to read bitcode"); + std::unique_ptr<Module> MPartInCtx = std::move(MOrErr.get()); + + codegen(MPartInCtx.get(), *ThreadOS, TheTarget, CPU, Features, + Options, RM, CM, OL, FileType); + }, + // Pass BC using std::move to ensure that it get moved rather than + // copied into the thread's context. + std::move(BC)); + }); + + for (thread &T : Threads) + T.join(); + + return {}; +} diff --git a/contrib/llvm/lib/CodeGen/Passes.cpp b/contrib/llvm/lib/CodeGen/Passes.cpp index 024d166a4987..873f7125b82a 100644 --- a/contrib/llvm/lib/CodeGen/Passes.cpp +++ b/contrib/llvm/lib/CodeGen/Passes.cpp @@ -13,7 +13,11 @@ //===---------------------------------------------------------------------===// #include "llvm/CodeGen/Passes.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/CFLAliasAnalysis.h" #include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ScopedNoAliasAA.h" +#include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/IR/IRPrintingPasses.h" @@ -52,9 +56,6 @@ static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden, cl::desc("Disable Machine LICM")); static cl::opt<bool> DisableMachineCSE("disable-machine-cse", cl::Hidden, cl::desc("Disable Machine Common Subexpression Elimination")); -static cl::opt<cl::boolOrDefault> - EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden, - cl::desc("enable the shrink-wrapping pass")); static cl::opt<cl::boolOrDefault> OptimizeRegAlloc( "optimize-regalloc", cl::Hidden, cl::desc("Enable optimized register allocation compilation path.")); @@ -95,10 +96,10 @@ PrintMachineInstrs("print-machineinstrs", cl::ValueOptional, // Temporary option to allow experimenting with MachineScheduler as a post-RA // scheduler. Targets can "properly" enable this with -// substitutePass(&PostRASchedulerID, &PostMachineSchedulerID); Ideally it -// wouldn't be part of the standard pass pipeline, and the target would just add -// a PostRA scheduling pass wherever it wants. -static cl::opt<bool> MISchedPostRA("misched-postra", cl::Hidden, +// substitutePass(&PostRASchedulerID, &PostMachineSchedulerID). +// Targets can return true in targetSchedulesPostRAScheduling() and +// insert a PostRA scheduling pass wherever it wants. +cl::opt<bool> MISchedPostRA("misched-postra", cl::Hidden, cl::desc("Run MachineScheduler post regalloc (independent of preRA sched)")); // Experimental option to run live interval analysis early. @@ -188,6 +189,29 @@ char TargetPassConfig::ID = 0; char TargetPassConfig::EarlyTailDuplicateID = 0; char TargetPassConfig::PostRAMachineLICMID = 0; +namespace { +struct InsertedPass { + AnalysisID TargetPassID; + IdentifyingPassPtr InsertedPassID; + bool VerifyAfter; + bool PrintAfter; + + InsertedPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID, + bool VerifyAfter, bool PrintAfter) + : TargetPassID(TargetPassID), InsertedPassID(InsertedPassID), + VerifyAfter(VerifyAfter), PrintAfter(PrintAfter) {} + + Pass *getInsertedPass() const { + assert(InsertedPassID.isValid() && "Illegal Pass ID!"); + if (InsertedPassID.isInstance()) + return InsertedPassID.getInstance(); + Pass *NP = Pass::createPass(InsertedPassID.getID()); + assert(NP && "Pass ID not registered"); + return NP; + } +}; +} + namespace llvm { class PassConfigImpl { public: @@ -202,7 +226,7 @@ public: /// Store the pairs of <AnalysisID, AnalysisID> of which the second pass /// is inserted after each instance of the first one. - SmallVector<std::pair<AnalysisID, IdentifyingPassPtr>, 4> InsertedPasses; + SmallVector<InsertedPass, 4> InsertedPasses; }; } // namespace llvm @@ -217,7 +241,7 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) : ImmutablePass(ID), PM(&pm), StartBefore(nullptr), StartAfter(nullptr), StopAfter(nullptr), Started(true), Stopped(false), AddingMachinePasses(false), TM(tm), Impl(nullptr), Initialized(false), - DisableVerify(false), EnableTailMerge(true), EnableShrinkWrap(false) { + DisableVerify(false), EnableTailMerge(true) { Impl = new PassConfigImpl(); @@ -225,6 +249,10 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) // including this pass itself. initializeCodeGen(*PassRegistry::getPassRegistry()); + // Also register alias analysis passes required by codegen passes. + initializeBasicAAWrapperPassPass(*PassRegistry::getPassRegistry()); + initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry()); + // Substitute Pseudo Pass IDs for real ones. substitutePass(&EarlyTailDuplicateID, &TailDuplicateID); substitutePass(&PostRAMachineLICMID, &MachineLICMID); @@ -232,14 +260,15 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) /// Insert InsertedPassID pass after TargetPassID. void TargetPassConfig::insertPass(AnalysisID TargetPassID, - IdentifyingPassPtr InsertedPassID) { + IdentifyingPassPtr InsertedPassID, + bool VerifyAfter, bool PrintAfter) { assert(((!InsertedPassID.isInstance() && TargetPassID != InsertedPassID.getID()) || (InsertedPassID.isInstance() && TargetPassID != InsertedPassID.getInstance()->getPassID())) && "Insert a pass after itself!"); - std::pair<AnalysisID, IdentifyingPassPtr> P(TargetPassID, InsertedPassID); - Impl->InsertedPasses.push_back(P); + Impl->InsertedPasses.emplace_back(TargetPassID, InsertedPassID, VerifyAfter, + PrintAfter); } /// createPassConfig - Create a pass configuration object to be used by @@ -304,21 +333,9 @@ void TargetPassConfig::addPass(Pass *P, bool verifyAfter, bool printAfter) { } // Add the passes after the pass P if there is any. - for (SmallVectorImpl<std::pair<AnalysisID, IdentifyingPassPtr> >::iterator - I = Impl->InsertedPasses.begin(), - E = Impl->InsertedPasses.end(); - I != E; ++I) { - if ((*I).first == PassID) { - assert((*I).second.isValid() && "Illegal Pass ID!"); - Pass *NP; - if ((*I).second.isInstance()) - NP = (*I).second.getInstance(); - else { - NP = Pass::createPass((*I).second.getID()); - assert(NP && "Pass ID not registered"); - } - addPass(NP, false, false); - } + for (auto IP : Impl->InsertedPasses) { + if (IP.TargetPassID == PassID) + addPass(IP.getInsertedPass(), IP.VerifyAfter, IP.PrintAfter); } } else { delete P; @@ -380,10 +397,10 @@ void TargetPassConfig::addIRPasses() { // BasicAliasAnalysis wins if they disagree. This is intended to help // support "obvious" type-punning idioms. if (UseCFLAA) - addPass(createCFLAliasAnalysisPass()); - addPass(createTypeBasedAliasAnalysisPass()); - addPass(createScopedNoAliasAAPass()); - addPass(createBasicAliasAnalysisPass()); + addPass(createCFLAAWrapperPass()); + addPass(createTypeBasedAAWrapperPass()); + addPass(createScopedNoAliasAAWrapperPass()); + addPass(createBasicAAWrapperPass()); // Before running any passes, run the verifier to determine if the input // coming from the front-end and/or optimizer is valid. @@ -461,7 +478,7 @@ void TargetPassConfig::addISelPrepare() { // Add both the safe stack and the stack protection passes: each of them will // only protect functions that have corresponding attributes. - addPass(createSafeStackPass()); + addPass(createSafeStackPass(TM)); addPass(createStackProtectorPass(TM)); if (PrintISelInput) @@ -539,8 +556,9 @@ void TargetPassConfig::addMachinePasses() { addPostRegAlloc(); // Insert prolog/epilog code. Eliminate abstract frame index references... - if (getEnableShrinkWrap()) + if (getOptLevel() != CodeGenOpt::None) addPass(&ShrinkWrapID); + addPass(&PrologEpilogCodeInserterID); /// Add passes that optimize machine instructions after register allocation. @@ -557,7 +575,10 @@ void TargetPassConfig::addMachinePasses() { addPass(&ImplicitNullChecksID); // Second pass scheduler. - if (getOptLevel() != CodeGenOpt::None) { + // Let Target optionally insert this pass by itself at some other + // point. + if (getOptLevel() != CodeGenOpt::None && + !TM->targetSchedulesPostRAScheduling()) { if (MISchedPostRA) addPass(&PostMachineSchedulerID); else @@ -576,7 +597,10 @@ void TargetPassConfig::addMachinePasses() { addPreEmitPass(); + addPass(&FuncletLayoutID, false); + addPass(&StackMapLivenessID, false); + addPass(&LiveDebugValuesID, false); AddingMachinePasses = false; } @@ -613,27 +637,12 @@ void TargetPassConfig::addMachineSSAOptimization() { addPass(&MachineCSEID, false); addPass(&MachineSinkingID); - addPass(&PeepholeOptimizerID, false); + addPass(&PeepholeOptimizerID); // Clean-up the dead code that may have been generated by peephole // rewriting. addPass(&DeadMachineInstructionElimID); } -bool TargetPassConfig::getEnableShrinkWrap() const { - switch (EnableShrinkWrapOpt) { - case cl::BOU_UNSET: - return EnableShrinkWrap && getOptLevel() != CodeGenOpt::None; - // If EnableShrinkWrap is set, it takes precedence on whatever the - // target sets. The rational is that we assume we want to test - // something related to shrink-wrapping. - case cl::BOU_TRUE: - return true; - case cl::BOU_FALSE: - return false; - } - llvm_unreachable("Invalid shrink-wrapping state"); -} - //===---------------------------------------------------------------------===// /// Register Allocation Pass Configuration //===---------------------------------------------------------------------===// @@ -717,7 +726,8 @@ void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { addPass(&PHIEliminationID, false); addPass(&TwoAddressInstructionPassID, false); - addPass(RegAllocPass); + if (RegAllocPass) + addPass(RegAllocPass); } /// Add standard target-independent passes that are tightly coupled with @@ -748,25 +758,27 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { // PreRA instruction scheduling. addPass(&MachineSchedulerID); - // Add the selected register allocation pass. - addPass(RegAllocPass); + if (RegAllocPass) { + // Add the selected register allocation pass. + addPass(RegAllocPass); - // Allow targets to change the register assignments before rewriting. - addPreRewrite(); + // Allow targets to change the register assignments before rewriting. + addPreRewrite(); - // Finally rewrite virtual registers. - addPass(&VirtRegRewriterID); + // Finally rewrite virtual registers. + addPass(&VirtRegRewriterID); - // Perform stack slot coloring and post-ra machine LICM. - // - // FIXME: Re-enable coloring with register when it's capable of adding - // kill markers. - addPass(&StackSlotColoringID); + // Perform stack slot coloring and post-ra machine LICM. + // + // FIXME: Re-enable coloring with register when it's capable of adding + // kill markers. + addPass(&StackSlotColoringID); - // Run post-ra machine LICM to hoist reloads / remats. - // - // FIXME: can this move into MachineLateOptimization? - addPass(&PostRAMachineLICMID); + // Run post-ra machine LICM to hoist reloads / remats. + // + // FIXME: can this move into MachineLateOptimization? + addPass(&PostRAMachineLICMID); + } } //===---------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp index ebe05e3f2731..52b42b624ee1 100644 --- a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -43,7 +43,7 @@ // - Optimize Loads: // // Loads that can be folded into a later instruction. A load is foldable -// if it loads to virtual registers and the virtual register defined has +// if it loads to virtual registers and the virtual register defined has // a single use. // // - Optimize Copies and Bitcast (more generally, target specific copies): @@ -98,6 +98,16 @@ static cl::opt<bool> DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false), cl::desc("Disable advanced copy optimization")); +static cl::opt<bool> DisableNAPhysCopyOpt( + "disable-non-allocatable-phys-copy-opt", cl::Hidden, cl::init(false), + cl::desc("Disable non-allocatable physical register copy optimization")); + +// Limit the number of PHI instructions to process +// in PeepholeOptimizer::getNextSource. +static cl::opt<unsigned> RewritePHILimit( + "rewrite-phi-limit", cl::Hidden, cl::init(10), + cl::desc("Limit the length of PHI chains to lookup")); + STATISTIC(NumReuse, "Number of extension results reused"); STATISTIC(NumCmps, "Number of compares eliminated"); STATISTIC(NumImmFold, "Number of move immediate folded"); @@ -105,8 +115,11 @@ STATISTIC(NumLoadFold, "Number of loads folded"); STATISTIC(NumSelects, "Number of selects optimized"); STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized"); STATISTIC(NumRewrittenCopies, "Number of copies rewritten"); +STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed"); namespace { + class ValueTrackerResult; + class PeepholeOptimizer : public MachineFunctionPass { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; @@ -130,6 +143,10 @@ namespace { } } + /// \brief Track Def -> Use info used for rewriting copies. + typedef SmallDenseMap<TargetInstrInfo::RegSubRegPair, ValueTrackerResult> + RewriteMapTy; + private: bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, @@ -137,17 +154,38 @@ namespace { bool optimizeSelect(MachineInstr *MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs); bool optimizeCondBranch(MachineInstr *MI); - bool optimizeCopyOrBitcast(MachineInstr *MI); bool optimizeCoalescableCopy(MachineInstr *MI); bool optimizeUncoalescableCopy(MachineInstr *MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs); - bool findNextSource(unsigned &Reg, unsigned &SubReg); + bool findNextSource(unsigned Reg, unsigned SubReg, + RewriteMapTy &RewriteMap); bool isMoveImmediate(MachineInstr *MI, SmallSet<unsigned, 4> &ImmDefRegs, DenseMap<unsigned, MachineInstr*> &ImmDefMIs); bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, SmallSet<unsigned, 4> &ImmDefRegs, DenseMap<unsigned, MachineInstr*> &ImmDefMIs); + + /// \brief If copy instruction \p MI is a virtual register copy, track it in + /// the set \p CopySrcRegs and \p CopyMIs. If this virtual register was + /// previously seen as a copy, replace the uses of this copy with the + /// previously seen copy's destination register. + bool foldRedundantCopy(MachineInstr *MI, + SmallSet<unsigned, 4> &CopySrcRegs, + DenseMap<unsigned, MachineInstr *> &CopyMIs); + + /// \brief Is the register \p Reg a non-allocatable physical register? + bool isNAPhysCopy(unsigned Reg); + + /// \brief If copy instruction \p MI is a non-allocatable virtual<->physical + /// register copy, track it in the \p NAPhysToVirtMIs map. If this + /// non-allocatable physical register was previously copied to a virtual + /// registered and hasn't been clobbered, the virt->phys copy can be + /// deleted. + bool foldRedundantNAPhysCopy( + MachineInstr *MI, + DenseMap<unsigned, MachineInstr *> &NAPhysToVirtMIs); + bool isLoadFoldable(MachineInstr *MI, SmallSet<unsigned, 16> &FoldAsLoadDefCandidates); @@ -171,6 +209,69 @@ namespace { } }; + /// \brief Helper class to hold a reply for ValueTracker queries. Contains the + /// returned sources for a given search and the instructions where the sources + /// were tracked from. + class ValueTrackerResult { + private: + /// Track all sources found by one ValueTracker query. + SmallVector<TargetInstrInfo::RegSubRegPair, 2> RegSrcs; + + /// Instruction using the sources in 'RegSrcs'. + const MachineInstr *Inst; + + public: + ValueTrackerResult() : Inst(nullptr) {} + ValueTrackerResult(unsigned Reg, unsigned SubReg) : Inst(nullptr) { + addSource(Reg, SubReg); + } + + bool isValid() const { return getNumSources() > 0; } + + void setInst(const MachineInstr *I) { Inst = I; } + const MachineInstr *getInst() const { return Inst; } + + void clear() { + RegSrcs.clear(); + Inst = nullptr; + } + + void addSource(unsigned SrcReg, unsigned SrcSubReg) { + RegSrcs.push_back(TargetInstrInfo::RegSubRegPair(SrcReg, SrcSubReg)); + } + + void setSource(int Idx, unsigned SrcReg, unsigned SrcSubReg) { + assert(Idx < getNumSources() && "Reg pair source out of index"); + RegSrcs[Idx] = TargetInstrInfo::RegSubRegPair(SrcReg, SrcSubReg); + } + + int getNumSources() const { return RegSrcs.size(); } + + unsigned getSrcReg(int Idx) const { + assert(Idx < getNumSources() && "Reg source out of index"); + return RegSrcs[Idx].Reg; + } + + unsigned getSrcSubReg(int Idx) const { + assert(Idx < getNumSources() && "SubReg source out of index"); + return RegSrcs[Idx].SubReg; + } + + bool operator==(const ValueTrackerResult &Other) { + if (Other.getInst() != getInst()) + return false; + + if (Other.getNumSources() != getNumSources()) + return false; + + for (int i = 0, e = Other.getNumSources(); i != e; ++i) + if (Other.getSrcReg(i) != getSrcReg(i) || + Other.getSrcSubReg(i) != getSrcSubReg(i)) + return false; + return true; + } + }; + /// \brief Helper class to track the possible sources of a value defined by /// a (chain of) copy related instructions. /// Given a definition (instruction and definition index), this class @@ -213,23 +314,25 @@ namespace { /// \brief Dispatcher to the right underlying implementation of /// getNextSource. - bool getNextSourceImpl(unsigned &SrcReg, unsigned &SrcSubReg); + ValueTrackerResult getNextSourceImpl(); /// \brief Specialized version of getNextSource for Copy instructions. - bool getNextSourceFromCopy(unsigned &SrcReg, unsigned &SrcSubReg); + ValueTrackerResult getNextSourceFromCopy(); /// \brief Specialized version of getNextSource for Bitcast instructions. - bool getNextSourceFromBitcast(unsigned &SrcReg, unsigned &SrcSubReg); + ValueTrackerResult getNextSourceFromBitcast(); /// \brief Specialized version of getNextSource for RegSequence /// instructions. - bool getNextSourceFromRegSequence(unsigned &SrcReg, unsigned &SrcSubReg); + ValueTrackerResult getNextSourceFromRegSequence(); /// \brief Specialized version of getNextSource for InsertSubreg /// instructions. - bool getNextSourceFromInsertSubreg(unsigned &SrcReg, unsigned &SrcSubReg); + ValueTrackerResult getNextSourceFromInsertSubreg(); /// \brief Specialized version of getNextSource for ExtractSubreg /// instructions. - bool getNextSourceFromExtractSubreg(unsigned &SrcReg, unsigned &SrcSubReg); + ValueTrackerResult getNextSourceFromExtractSubreg(); /// \brief Specialized version of getNextSource for SubregToReg /// instructions. - bool getNextSourceFromSubregToReg(unsigned &SrcReg, unsigned &SrcSubReg); + ValueTrackerResult getNextSourceFromSubregToReg(); + /// \brief Specialized version of getNextSource for PHI instructions. + ValueTrackerResult getNextSourceFromPHI(); public: /// \brief Create a ValueTracker instance for the value defined by \p Reg. @@ -276,16 +379,10 @@ namespace { /// \brief Following the use-def chain, get the next available source /// for the tracked value. - /// When the returned value is not nullptr, \p SrcReg gives the register - /// that contain the tracked value. - /// \note The sub register index returned in \p SrcSubReg must be used - /// on \p SrcReg to access the actual value. - /// \return Unless the returned value is nullptr (i.e., no source found), - /// \p SrcReg gives the register of the next source used in the returned - /// instruction and \p SrcSubReg the sub-register index to be used on that - /// source to get the tracked value. When nullptr is returned, no - /// alternative source has been found. - const MachineInstr *getNextSource(unsigned &SrcReg, unsigned &SrcSubReg); + /// \return A ValueTrackerResult containing a set of registers + /// and sub registers with tracked values. A ValueTrackerResult with + /// an empty set of registers means no source was found. + ValueTrackerResult getNextSource(); /// \brief Get the last register where the initial value can be found. /// Initially this is the register of the definition. @@ -303,11 +400,10 @@ INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts", "Peephole Optimizations", false, false) -/// optimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads -/// a single register and writes a single register and it does not modify the -/// source, and if the source value is preserved as a sub-register of the -/// result, then replace all reachable uses of the source with the subreg of the -/// result. +/// If instruction is a copy-like instruction, i.e. it reads a single register +/// and writes a single register and it does not modify the source, and if the +/// source value is preserved as a sub-register of the result, then replace all +/// reachable uses of the source with the subreg of the result. /// /// Do not generate an EXTRACT that is used only in a debug use, as this changes /// the code. Since this code does not currently share EXTRACTs, just ignore all @@ -458,10 +554,10 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, return Changed; } -/// optimizeCmpInstr - If the instruction is a compare and the previous -/// instruction it's comparing against all ready sets (or could be modified to -/// set) the same flag as the compare, then we can remove the comparison and use -/// the flag from the previous instruction. +/// If the instruction is a compare and the previous instruction it's comparing +/// against already sets (or could be modified to set) the same flag as the +/// compare, then we can remove the comparison and use the flag from the +/// previous instruction. bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB) { // If this instruction is a comparison against zero and isn't comparing a @@ -506,88 +602,138 @@ bool PeepholeOptimizer::optimizeCondBranch(MachineInstr *MI) { return TII->optimizeCondBranch(MI); } -/// \brief Check if the registers defined by the pair (RegisterClass, SubReg) -/// share the same register file. -static bool shareSameRegisterFile(const TargetRegisterInfo &TRI, - const TargetRegisterClass *DefRC, - unsigned DefSubReg, - const TargetRegisterClass *SrcRC, - unsigned SrcSubReg) { - // Same register class. - if (DefRC == SrcRC) - return true; - - // Both operands are sub registers. Check if they share a register class. - unsigned SrcIdx, DefIdx; - if (SrcSubReg && DefSubReg) - return TRI.getCommonSuperRegClass(SrcRC, SrcSubReg, DefRC, DefSubReg, - SrcIdx, DefIdx) != nullptr; - // At most one of the register is a sub register, make it Src to avoid - // duplicating the test. - if (!SrcSubReg) { - std::swap(DefSubReg, SrcSubReg); - std::swap(DefRC, SrcRC); - } - - // One of the register is a sub register, check if we can get a superclass. - if (SrcSubReg) - return TRI.getMatchingSuperRegClass(SrcRC, DefRC, SrcSubReg) != nullptr; - // Plain copy. - return TRI.getCommonSubClass(DefRC, SrcRC) != nullptr; -} - /// \brief Try to find the next source that share the same register file /// for the value defined by \p Reg and \p SubReg. -/// When true is returned, \p Reg and \p SubReg are updated with the -/// register number and sub-register index of the new source. +/// When true is returned, the \p RewriteMap can be used by the client to +/// retrieve all Def -> Use along the way up to the next source. Any found +/// Use that is not itself a key for another entry, is the next source to +/// use. During the search for the next source, multiple sources can be found +/// given multiple incoming sources of a PHI instruction. In this case, we +/// look in each PHI source for the next source; all found next sources must +/// share the same register file as \p Reg and \p SubReg. The client should +/// then be capable to rewrite all intermediate PHIs to get the next source. /// \return False if no alternative sources are available. True otherwise. -bool PeepholeOptimizer::findNextSource(unsigned &Reg, unsigned &SubReg) { +bool PeepholeOptimizer::findNextSource(unsigned Reg, unsigned SubReg, + RewriteMapTy &RewriteMap) { // Do not try to find a new source for a physical register. // So far we do not have any motivating example for doing that. // Thus, instead of maintaining untested code, we will revisit that if // that changes at some point. if (TargetRegisterInfo::isPhysicalRegister(Reg)) return false; - const TargetRegisterClass *DefRC = MRI->getRegClass(Reg); - unsigned DefSubReg = SubReg; - - unsigned Src; - unsigned SrcSubReg; - bool ShouldRewrite = false; - - // Follow the chain of copies until we reach the top of the use-def chain - // or find a more suitable source. - ValueTracker ValTracker(Reg, DefSubReg, *MRI, !DisableAdvCopyOpt, TII); - do { - unsigned CopySrcReg, CopySrcSubReg; - if (!ValTracker.getNextSource(CopySrcReg, CopySrcSubReg)) - break; - Src = CopySrcReg; - SrcSubReg = CopySrcSubReg; - - // Do not extend the live-ranges of physical registers as they add - // constraints to the register allocator. - // Moreover, if we want to extend the live-range of a physical register, - // unlike SSA virtual register, we will have to check that they are not - // redefine before the related use. - if (TargetRegisterInfo::isPhysicalRegister(Src)) - break; - const TargetRegisterClass *SrcRC = MRI->getRegClass(Src); + SmallVector<TargetInstrInfo::RegSubRegPair, 4> SrcToLook; + TargetInstrInfo::RegSubRegPair CurSrcPair(Reg, SubReg); + SrcToLook.push_back(CurSrcPair); + + unsigned PHICount = 0; + while (!SrcToLook.empty() && PHICount < RewritePHILimit) { + TargetInstrInfo::RegSubRegPair Pair = SrcToLook.pop_back_val(); + // As explained above, do not handle physical registers + if (TargetRegisterInfo::isPhysicalRegister(Pair.Reg)) + return false; - // If this source does not incur a cross register bank copy, use it. - ShouldRewrite = shareSameRegisterFile(*TRI, DefRC, DefSubReg, SrcRC, - SrcSubReg); - } while (!ShouldRewrite); + CurSrcPair = Pair; + ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI, + !DisableAdvCopyOpt, TII); + ValueTrackerResult Res; + bool ShouldRewrite = false; + + do { + // Follow the chain of copies until we reach the top of the use-def chain + // or find a more suitable source. + Res = ValTracker.getNextSource(); + if (!Res.isValid()) + break; + + // Insert the Def -> Use entry for the recently found source. + ValueTrackerResult CurSrcRes = RewriteMap.lookup(CurSrcPair); + if (CurSrcRes.isValid()) { + assert(CurSrcRes == Res && "ValueTrackerResult found must match"); + // An existent entry with multiple sources is a PHI cycle we must avoid. + // Otherwise it's an entry with a valid next source we already found. + if (CurSrcRes.getNumSources() > 1) { + DEBUG(dbgs() << "findNextSource: found PHI cycle, aborting...\n"); + return false; + } + break; + } + RewriteMap.insert(std::make_pair(CurSrcPair, Res)); + + // ValueTrackerResult usually have one source unless it's the result from + // a PHI instruction. Add the found PHI edges to be looked up further. + unsigned NumSrcs = Res.getNumSources(); + if (NumSrcs > 1) { + PHICount++; + for (unsigned i = 0; i < NumSrcs; ++i) + SrcToLook.push_back(TargetInstrInfo::RegSubRegPair( + Res.getSrcReg(i), Res.getSrcSubReg(i))); + break; + } - // If we did not find a more suitable source, there is nothing to optimize. - if (!ShouldRewrite || Src == Reg) + CurSrcPair.Reg = Res.getSrcReg(0); + CurSrcPair.SubReg = Res.getSrcSubReg(0); + // Do not extend the live-ranges of physical registers as they add + // constraints to the register allocator. Moreover, if we want to extend + // the live-range of a physical register, unlike SSA virtual register, + // we will have to check that they aren't redefine before the related use. + if (TargetRegisterInfo::isPhysicalRegister(CurSrcPair.Reg)) + return false; + + const TargetRegisterClass *SrcRC = MRI->getRegClass(CurSrcPair.Reg); + ShouldRewrite = TRI->shouldRewriteCopySrc(DefRC, SubReg, SrcRC, + CurSrcPair.SubReg); + } while (!ShouldRewrite); + + // Continue looking for new sources... + if (Res.isValid()) + continue; + + // Do not continue searching for a new source if the there's at least + // one use-def which cannot be rewritten. + if (!ShouldRewrite) + return false; + } + + if (PHICount >= RewritePHILimit) { + DEBUG(dbgs() << "findNextSource: PHI limit reached\n"); return false; + } - Reg = Src; - SubReg = SrcSubReg; - return true; + // If we did not find a more suitable source, there is nothing to optimize. + return CurSrcPair.Reg != Reg; +} + +/// \brief Insert a PHI instruction with incoming edges \p SrcRegs that are +/// guaranteed to have the same register class. This is necessary whenever we +/// successfully traverse a PHI instruction and find suitable sources coming +/// from its edges. By inserting a new PHI, we provide a rewritten PHI def +/// suitable to be used in a new COPY instruction. +static MachineInstr * +insertPHI(MachineRegisterInfo *MRI, const TargetInstrInfo *TII, + const SmallVectorImpl<TargetInstrInfo::RegSubRegPair> &SrcRegs, + MachineInstr *OrigPHI) { + assert(!SrcRegs.empty() && "No sources to create a PHI instruction?"); + + const TargetRegisterClass *NewRC = MRI->getRegClass(SrcRegs[0].Reg); + unsigned NewVR = MRI->createVirtualRegister(NewRC); + MachineBasicBlock *MBB = OrigPHI->getParent(); + MachineInstrBuilder MIB = BuildMI(*MBB, OrigPHI, OrigPHI->getDebugLoc(), + TII->get(TargetOpcode::PHI), NewVR); + + unsigned MBBOpIdx = 2; + for (auto RegPair : SrcRegs) { + MIB.addReg(RegPair.Reg, 0, RegPair.SubReg); + MIB.addMBB(OrigPHI->getOperand(MBBOpIdx).getMBB()); + // Since we're extended the lifetime of RegPair.Reg, clear the + // kill flags to account for that and make RegPair.Reg reaches + // the new PHI. + MRI->clearKillFlags(RegPair.Reg); + MBBOpIdx += 2; + } + + return MIB; } namespace { @@ -624,7 +770,7 @@ public: /// This source defines the whole definition, i.e., /// (TrackReg, TrackSubReg) = (dst, dstSubIdx). /// - /// The second and subsequent calls will return false, has there is only one + /// The second and subsequent calls will return false, as there is only one /// rewritable source. /// /// \return True if a rewritable source has been found, false otherwise. @@ -632,9 +778,9 @@ public: virtual bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, unsigned &TrackReg, unsigned &TrackSubReg) { - // If CurrentSrcIdx == 1, this means this function has already been - // called once. CopyLike has one defintiion and one argument, thus, - // there is nothing else to rewrite. + // If CurrentSrcIdx == 1, this means this function has already been called + // once. CopyLike has one definition and one argument, thus, there is + // nothing else to rewrite. if (!CopyLike.isCopy() || CurrentSrcIdx == 1) return false; // This is the first call to getNextRewritableSource. @@ -653,7 +799,7 @@ public: /// \brief Rewrite the current source with \p NewReg and \p NewSubReg /// if possible. - /// \return True if the rewritting was possible, false otherwise. + /// \return True if the rewriting was possible, false otherwise. virtual bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) { if (!CopyLike.isCopy() || CurrentSrcIdx != 1) return false; @@ -662,6 +808,157 @@ public: MOSrc.setSubReg(NewSubReg); return true; } + + /// \brief Given a \p Def.Reg and Def.SubReg pair, use \p RewriteMap to find + /// the new source to use for rewrite. If \p HandleMultipleSources is true and + /// multiple sources for a given \p Def are found along the way, we found a + /// PHI instructions that needs to be rewritten. + /// TODO: HandleMultipleSources should be removed once we test PHI handling + /// with coalescable copies. + TargetInstrInfo::RegSubRegPair + getNewSource(MachineRegisterInfo *MRI, const TargetInstrInfo *TII, + TargetInstrInfo::RegSubRegPair Def, + PeepholeOptimizer::RewriteMapTy &RewriteMap, + bool HandleMultipleSources = true) { + + TargetInstrInfo::RegSubRegPair LookupSrc(Def.Reg, Def.SubReg); + do { + ValueTrackerResult Res = RewriteMap.lookup(LookupSrc); + // If there are no entries on the map, LookupSrc is the new source. + if (!Res.isValid()) + return LookupSrc; + + // There's only one source for this definition, keep searching... + unsigned NumSrcs = Res.getNumSources(); + if (NumSrcs == 1) { + LookupSrc.Reg = Res.getSrcReg(0); + LookupSrc.SubReg = Res.getSrcSubReg(0); + continue; + } + + // TODO: Remove once multiple srcs w/ coalescable copies are supported. + if (!HandleMultipleSources) + break; + + // Multiple sources, recurse into each source to find a new source + // for it. Then, rewrite the PHI accordingly to its new edges. + SmallVector<TargetInstrInfo::RegSubRegPair, 4> NewPHISrcs; + for (unsigned i = 0; i < NumSrcs; ++i) { + TargetInstrInfo::RegSubRegPair PHISrc(Res.getSrcReg(i), + Res.getSrcSubReg(i)); + NewPHISrcs.push_back( + getNewSource(MRI, TII, PHISrc, RewriteMap, HandleMultipleSources)); + } + + // Build the new PHI node and return its def register as the new source. + MachineInstr *OrigPHI = const_cast<MachineInstr *>(Res.getInst()); + MachineInstr *NewPHI = insertPHI(MRI, TII, NewPHISrcs, OrigPHI); + DEBUG(dbgs() << "-- getNewSource\n"); + DEBUG(dbgs() << " Replacing: " << *OrigPHI); + DEBUG(dbgs() << " With: " << *NewPHI); + const MachineOperand &MODef = NewPHI->getOperand(0); + return TargetInstrInfo::RegSubRegPair(MODef.getReg(), MODef.getSubReg()); + + } while (1); + + return TargetInstrInfo::RegSubRegPair(0, 0); + } + + /// \brief Rewrite the source found through \p Def, by using the \p RewriteMap + /// and create a new COPY instruction. More info about RewriteMap in + /// PeepholeOptimizer::findNextSource. Right now this is only used to handle + /// Uncoalescable copies, since they are copy like instructions that aren't + /// recognized by the register allocator. + virtual MachineInstr * + RewriteSource(TargetInstrInfo::RegSubRegPair Def, + PeepholeOptimizer::RewriteMapTy &RewriteMap) { + return nullptr; + } +}; + +/// \brief Helper class to rewrite uncoalescable copy like instructions +/// into new COPY (coalescable friendly) instructions. +class UncoalescableRewriter : public CopyRewriter { +protected: + const TargetInstrInfo &TII; + MachineRegisterInfo &MRI; + /// The number of defs in the bitcast + unsigned NumDefs; + +public: + UncoalescableRewriter(MachineInstr &MI, const TargetInstrInfo &TII, + MachineRegisterInfo &MRI) + : CopyRewriter(MI), TII(TII), MRI(MRI) { + NumDefs = MI.getDesc().getNumDefs(); + } + + /// \brief Get the next rewritable def source (TrackReg, TrackSubReg) + /// All such sources need to be considered rewritable in order to + /// rewrite a uncoalescable copy-like instruction. This method return + /// each definition that must be checked if rewritable. + /// + bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, + unsigned &TrackReg, + unsigned &TrackSubReg) override { + // Find the next non-dead definition and continue from there. + if (CurrentSrcIdx == NumDefs) + return false; + + while (CopyLike.getOperand(CurrentSrcIdx).isDead()) { + ++CurrentSrcIdx; + if (CurrentSrcIdx == NumDefs) + return false; + } + + // What we track are the alternative sources of the definition. + const MachineOperand &MODef = CopyLike.getOperand(CurrentSrcIdx); + TrackReg = MODef.getReg(); + TrackSubReg = MODef.getSubReg(); + + CurrentSrcIdx++; + return true; + } + + /// \brief Rewrite the source found through \p Def, by using the \p RewriteMap + /// and create a new COPY instruction. More info about RewriteMap in + /// PeepholeOptimizer::findNextSource. Right now this is only used to handle + /// Uncoalescable copies, since they are copy like instructions that aren't + /// recognized by the register allocator. + MachineInstr * + RewriteSource(TargetInstrInfo::RegSubRegPair Def, + PeepholeOptimizer::RewriteMapTy &RewriteMap) override { + assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) && + "We do not rewrite physical registers"); + + // Find the new source to use in the COPY rewrite. + TargetInstrInfo::RegSubRegPair NewSrc = + getNewSource(&MRI, &TII, Def, RewriteMap); + + // Insert the COPY. + const TargetRegisterClass *DefRC = MRI.getRegClass(Def.Reg); + unsigned NewVR = MRI.createVirtualRegister(DefRC); + + MachineInstr *NewCopy = + BuildMI(*CopyLike.getParent(), &CopyLike, CopyLike.getDebugLoc(), + TII.get(TargetOpcode::COPY), NewVR) + .addReg(NewSrc.Reg, 0, NewSrc.SubReg); + + NewCopy->getOperand(0).setSubReg(Def.SubReg); + if (Def.SubReg) + NewCopy->getOperand(0).setIsUndef(); + + DEBUG(dbgs() << "-- RewriteSource\n"); + DEBUG(dbgs() << " Replacing: " << CopyLike); + DEBUG(dbgs() << " With: " << *NewCopy); + MRI.replaceRegWith(Def.Reg, NewVR); + MRI.clearKillFlags(NewVR); + + // We extended the lifetime of NewSrc.Reg, clear the kill flags to + // account for that. + MRI.clearKillFlags(NewSrc.Reg); + + return NewCopy; + } }; /// \brief Specialized rewriter for INSERT_SUBREG instruction. @@ -699,7 +996,7 @@ public: // partial definition. TrackReg = MODef.getReg(); if (MODef.getSubReg()) - // Bails if we have to compose sub-register indices. + // Bail if we have to compose sub-register indices. return false; TrackSubReg = (unsigned)CopyLike.getOperand(3).getImm(); return true; @@ -740,7 +1037,7 @@ public: CurrentSrcIdx = 1; const MachineOperand &MOExtractedReg = CopyLike.getOperand(1); SrcReg = MOExtractedReg.getReg(); - // If we have to compose sub-register indices, bails out. + // If we have to compose sub-register indices, bail out. if (MOExtractedReg.getSubReg()) return false; @@ -818,7 +1115,7 @@ public: } const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx); SrcReg = MOInsertedReg.getReg(); - // If we have to compose sub-register indices, bails out. + // If we have to compose sub-register indices, bail out. if ((SrcSubReg = MOInsertedReg.getSubReg())) return false; @@ -828,7 +1125,7 @@ public: const MachineOperand &MODef = CopyLike.getOperand(0); TrackReg = MODef.getReg(); - // If we have to compose sub-registers, bails. + // If we have to compose sub-registers, bail. return MODef.getSubReg() == 0; } @@ -850,7 +1147,13 @@ public: /// \return A pointer to a dynamically allocated CopyRewriter or nullptr /// if no rewriter works for \p MI. static CopyRewriter *getCopyRewriter(MachineInstr &MI, - const TargetInstrInfo &TII) { + const TargetInstrInfo &TII, + MachineRegisterInfo &MRI) { + // Handle uncoalescable copy-like instructions. + if (MI.isBitcast() || (MI.isRegSequenceLike() || MI.isInsertSubregLike() || + MI.isExtractSubregLike())) + return new UncoalescableRewriter(MI, TII, MRI); + switch (MI.getOpcode()) { default: return nullptr; @@ -874,7 +1177,7 @@ static CopyRewriter *getCopyRewriter(MachineInstr &MI, /// the same register bank. /// New copies issued by this optimization are register allocator /// friendly. This optimization does not remove any copy as it may -/// overconstraint the register allocator, but replaces some operands +/// overconstrain the register allocator, but replaces some operands /// when possible. /// \pre isCoalescableCopy(*MI) is true. /// \return True, when \p MI has been rewritten. False otherwise. @@ -889,25 +1192,33 @@ bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr *MI) { bool Changed = false; // Get the right rewriter for the current copy. - std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII)); - // If none exists, bails out. + std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII, *MRI)); + // If none exists, bail out. if (!CpyRewriter) return false; // Rewrite each rewritable source. unsigned SrcReg, SrcSubReg, TrackReg, TrackSubReg; while (CpyRewriter->getNextRewritableSource(SrcReg, SrcSubReg, TrackReg, TrackSubReg)) { - unsigned NewSrc = TrackReg; - unsigned NewSubReg = TrackSubReg; - // Try to find a more suitable source. - // If we failed to do so, or get the actual source, - // move to the next source. - if (!findNextSource(NewSrc, NewSubReg) || SrcReg == NewSrc) + // Keep track of PHI nodes and its incoming edges when looking for sources. + RewriteMapTy RewriteMap; + // Try to find a more suitable source. If we failed to do so, or get the + // actual source, move to the next source. + if (!findNextSource(TrackReg, TrackSubReg, RewriteMap)) + continue; + + // Get the new source to rewrite. TODO: Only enable handling of multiple + // sources (PHIs) once we have a motivating example and testcases for it. + TargetInstrInfo::RegSubRegPair TrackPair(TrackReg, TrackSubReg); + TargetInstrInfo::RegSubRegPair NewSrc = CpyRewriter->getNewSource( + MRI, TII, TrackPair, RewriteMap, false /* multiple sources */); + if (SrcReg == NewSrc.Reg || NewSrc.Reg == 0) continue; + // Rewrite source. - if (CpyRewriter->RewriteCurrentSource(NewSrc, NewSubReg)) { + if (CpyRewriter->RewriteCurrentSource(NewSrc.Reg, NewSrc.SubReg)) { // We may have extended the live-range of NewSrc, account for that. - MRI->clearKillFlags(NewSrc); + MRI->clearKillFlags(NewSrc.Reg); Changed = true; } } @@ -936,61 +1247,53 @@ bool PeepholeOptimizer::optimizeUncoalescableCopy( assert(MI && isUncoalescableCopy(*MI) && "Invalid argument"); // Check if we can rewrite all the values defined by this instruction. - SmallVector< - std::pair<TargetInstrInfo::RegSubRegPair, TargetInstrInfo::RegSubRegPair>, - 4> RewritePairs; - for (const MachineOperand &MODef : MI->defs()) { - if (MODef.isDead()) - // We can ignore those. - continue; + SmallVector<TargetInstrInfo::RegSubRegPair, 4> RewritePairs; + // Get the right rewriter for the current copy. + std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII, *MRI)); + // If none exists, bail out. + if (!CpyRewriter) + return false; + // Rewrite each rewritable source by generating new COPYs. This works + // differently from optimizeCoalescableCopy since it first makes sure that all + // definitions can be rewritten. + RewriteMapTy RewriteMap; + unsigned Reg, SubReg, CopyDefReg, CopyDefSubReg; + while (CpyRewriter->getNextRewritableSource(Reg, SubReg, CopyDefReg, + CopyDefSubReg)) { // If a physical register is here, this is probably for a good reason. // Do not rewrite that. - if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg())) + if (TargetRegisterInfo::isPhysicalRegister(CopyDefReg)) return false; // If we do not know how to rewrite this definition, there is no point // in trying to kill this instruction. - TargetInstrInfo::RegSubRegPair Def(MODef.getReg(), MODef.getSubReg()); - TargetInstrInfo::RegSubRegPair Src = Def; - if (!findNextSource(Src.Reg, Src.SubReg)) + TargetInstrInfo::RegSubRegPair Def(CopyDefReg, CopyDefSubReg); + if (!findNextSource(Def.Reg, Def.SubReg, RewriteMap)) return false; - RewritePairs.push_back(std::make_pair(Def, Src)); + + RewritePairs.push_back(Def); } + // The change is possible for all defs, do it. - for (const auto &PairDefSrc : RewritePairs) { - const auto &Def = PairDefSrc.first; - const auto &Src = PairDefSrc.second; + for (const auto &Def : RewritePairs) { // Rewrite the "copy" in a way the register coalescer understands. - assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) && - "We do not rewrite physical registers"); - const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg); - unsigned NewVR = MRI->createVirtualRegister(DefRC); - MachineInstr *NewCopy = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), - TII->get(TargetOpcode::COPY), - NewVR).addReg(Src.Reg, 0, Src.SubReg); - NewCopy->getOperand(0).setSubReg(Def.SubReg); - if (Def.SubReg) - NewCopy->getOperand(0).setIsUndef(); + MachineInstr *NewCopy = CpyRewriter->RewriteSource(Def, RewriteMap); + assert(NewCopy && "Should be able to always generate a new copy"); LocalMIs.insert(NewCopy); - MRI->replaceRegWith(Def.Reg, NewVR); - MRI->clearKillFlags(NewVR); - // We extended the lifetime of Src. - // Clear the kill flags to account for that. - MRI->clearKillFlags(Src.Reg); } + // MI is now dead. MI->eraseFromParent(); ++NumUncoalescableCopies; return true; } -/// isLoadFoldable - Check whether MI is a candidate for folding into a later -/// instruction. We only fold loads to virtual registers and the virtual -/// register defined has a single use. +/// Check whether MI is a candidate for folding into a later instruction. +/// We only fold loads to virtual registers and the virtual register defined +/// has a single use. bool PeepholeOptimizer::isLoadFoldable( - MachineInstr *MI, - SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) { + MachineInstr *MI, SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) { if (!MI->canFoldAsLoad() || !MI->mayLoad()) return false; const MCInstrDesc &MCID = MI->getDesc(); @@ -1010,9 +1313,9 @@ bool PeepholeOptimizer::isLoadFoldable( return false; } -bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI, - SmallSet<unsigned, 4> &ImmDefRegs, - DenseMap<unsigned, MachineInstr*> &ImmDefMIs) { +bool PeepholeOptimizer::isMoveImmediate( + MachineInstr *MI, SmallSet<unsigned, 4> &ImmDefRegs, + DenseMap<unsigned, MachineInstr *> &ImmDefMIs) { const MCInstrDesc &MCID = MI->getDesc(); if (!MI->isMoveImmediate()) return false; @@ -1028,23 +1331,26 @@ bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI, return false; } -/// foldImmediate - Try folding register operands that are defined by move -/// immediate instructions, i.e. a trivial constant folding optimization, if +/// Try folding register operands that are defined by move immediate +/// instructions, i.e. a trivial constant folding optimization, if /// and only if the def and use are in the same BB. -bool PeepholeOptimizer::foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, - SmallSet<unsigned, 4> &ImmDefRegs, - DenseMap<unsigned, MachineInstr*> &ImmDefMIs) { +bool PeepholeOptimizer::foldImmediate( + MachineInstr *MI, MachineBasicBlock *MBB, SmallSet<unsigned, 4> &ImmDefRegs, + DenseMap<unsigned, MachineInstr *> &ImmDefMIs) { for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || MO.isDef()) continue; + // Ignore dead implicit defs. + if (MO.isImplicit() && MO.isDead()) + continue; unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; if (ImmDefRegs.count(Reg) == 0) continue; DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg); - assert(II != ImmDefMIs.end()); + assert(II != ImmDefMIs.end() && "couldn't find immediate definition"); if (TII->FoldImmediate(MI, II->second, Reg, MRI)) { ++NumImmFold; return true; @@ -1053,6 +1359,117 @@ bool PeepholeOptimizer::foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, return false; } +// FIXME: This is very simple and misses some cases which should be handled when +// motivating examples are found. +// +// The copy rewriting logic should look at uses as well as defs and be able to +// eliminate copies across blocks. +// +// Later copies that are subregister extracts will also not be eliminated since +// only the first copy is considered. +// +// e.g. +// %vreg1 = COPY %vreg0 +// %vreg2 = COPY %vreg0:sub1 +// +// Should replace %vreg2 uses with %vreg1:sub1 +bool PeepholeOptimizer::foldRedundantCopy( + MachineInstr *MI, SmallSet<unsigned, 4> &CopySrcRegs, + DenseMap<unsigned, MachineInstr *> &CopyMIs) { + assert(MI->isCopy() && "expected a COPY machine instruction"); + + unsigned SrcReg = MI->getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + return false; + + unsigned DstReg = MI->getOperand(0).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(DstReg)) + return false; + + if (CopySrcRegs.insert(SrcReg).second) { + // First copy of this reg seen. + CopyMIs.insert(std::make_pair(SrcReg, MI)); + return false; + } + + MachineInstr *PrevCopy = CopyMIs.find(SrcReg)->second; + + unsigned SrcSubReg = MI->getOperand(1).getSubReg(); + unsigned PrevSrcSubReg = PrevCopy->getOperand(1).getSubReg(); + + // Can't replace different subregister extracts. + if (SrcSubReg != PrevSrcSubReg) + return false; + + unsigned PrevDstReg = PrevCopy->getOperand(0).getReg(); + + // Only replace if the copy register class is the same. + // + // TODO: If we have multiple copies to different register classes, we may want + // to track multiple copies of the same source register. + if (MRI->getRegClass(DstReg) != MRI->getRegClass(PrevDstReg)) + return false; + + MRI->replaceRegWith(DstReg, PrevDstReg); + + // Lifetime of the previous copy has been extended. + MRI->clearKillFlags(PrevDstReg); + return true; +} + +bool PeepholeOptimizer::isNAPhysCopy(unsigned Reg) { + return TargetRegisterInfo::isPhysicalRegister(Reg) && + !MRI->isAllocatable(Reg); +} + +bool PeepholeOptimizer::foldRedundantNAPhysCopy( + MachineInstr *MI, DenseMap<unsigned, MachineInstr *> &NAPhysToVirtMIs) { + assert(MI->isCopy() && "expected a COPY machine instruction"); + + if (DisableNAPhysCopyOpt) + return false; + + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned SrcReg = MI->getOperand(1).getReg(); + if (isNAPhysCopy(SrcReg) && TargetRegisterInfo::isVirtualRegister(DstReg)) { + // %vreg = COPY %PHYSREG + // Avoid using a datastructure which can track multiple live non-allocatable + // phys->virt copies since LLVM doesn't seem to do this. + NAPhysToVirtMIs.insert({SrcReg, MI}); + return false; + } + + if (!(TargetRegisterInfo::isVirtualRegister(SrcReg) && isNAPhysCopy(DstReg))) + return false; + + // %PHYSREG = COPY %vreg + auto PrevCopy = NAPhysToVirtMIs.find(DstReg); + if (PrevCopy == NAPhysToVirtMIs.end()) { + // We can't remove the copy: there was an intervening clobber of the + // non-allocatable physical register after the copy to virtual. + DEBUG(dbgs() << "NAPhysCopy: intervening clobber forbids erasing " << *MI + << '\n'); + return false; + } + + unsigned PrevDstReg = PrevCopy->second->getOperand(0).getReg(); + if (PrevDstReg == SrcReg) { + // Remove the virt->phys copy: we saw the virtual register definition, and + // the non-allocatable physical register's state hasn't changed since then. + DEBUG(dbgs() << "NAPhysCopy: erasing " << *MI << '\n'); + ++NumNAPhysCopies; + return true; + } + + // Potential missed optimization opportunity: we saw a different virtual + // register get a copy of the non-allocatable physical register, and we only + // track one such copy. Avoid getting confused by this new non-allocatable + // physical register definition, and remove it from the tracked copies. + DEBUG(dbgs() << "NAPhysCopy: missed opportunity " << *MI << '\n'); + NAPhysToVirtMIs.erase(PrevCopy); + return false; +} + bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (skipOptnoneFunction(*MF.getFunction())) return false; @@ -1070,9 +1487,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { - MachineBasicBlock *MBB = &*I; - + for (MachineBasicBlock &MBB : MF) { bool SeenMoveImm = false; // During this forward scan, at some point it needs to answer the question @@ -1086,8 +1501,19 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { DenseMap<unsigned, MachineInstr*> ImmDefMIs; SmallSet<unsigned, 16> FoldAsLoadDefCandidates; - for (MachineBasicBlock::iterator - MII = I->begin(), MIE = I->end(); MII != MIE; ) { + // Track when a non-allocatable physical register is copied to a virtual + // register so that useless moves can be removed. + // + // %PHYSREG is the map index; MI is the last valid `%vreg = COPY %PHYSREG` + // without any intervening re-definition of %PHYSREG. + DenseMap<unsigned, MachineInstr *> NAPhysToVirtMIs; + + // Set of virtual registers that are copied from. + SmallSet<unsigned, 4> CopySrcRegs; + DenseMap<unsigned, MachineInstr *> CopySrcMIs; + + for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end(); + MII != MIE; ) { MachineInstr *MI = &*MII; // We may be erasing MI below, increment MII now. ++MII; @@ -1097,20 +1523,60 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (MI->isDebugValue()) continue; - // If there exists an instruction which belongs to the following - // categories, we will discard the load candidates. - if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || - MI->isKill() || MI->isInlineAsm() || - MI->hasUnmodeledSideEffects()) { + // If we run into an instruction we can't fold across, discard + // the load candidates. + if (MI->isLoadFoldBarrier()) FoldAsLoadDefCandidates.clear(); + + if (MI->isPosition() || MI->isPHI()) + continue; + + if (!MI->isCopy()) { + for (const auto &Op : MI->operands()) { + // Visit all operands: definitions can be implicit or explicit. + if (Op.isReg()) { + unsigned Reg = Op.getReg(); + if (Op.isDef() && isNAPhysCopy(Reg)) { + const auto &Def = NAPhysToVirtMIs.find(Reg); + if (Def != NAPhysToVirtMIs.end()) { + // A new definition of the non-allocatable physical register + // invalidates previous copies. + DEBUG(dbgs() << "NAPhysCopy: invalidating because of " << *MI + << '\n'); + NAPhysToVirtMIs.erase(Def); + } + } + } else if (Op.isRegMask()) { + const uint32_t *RegMask = Op.getRegMask(); + for (auto &RegMI : NAPhysToVirtMIs) { + unsigned Def = RegMI.first; + if (MachineOperand::clobbersPhysReg(RegMask, Def)) { + DEBUG(dbgs() << "NAPhysCopy: invalidating because of " << *MI + << '\n'); + NAPhysToVirtMIs.erase(Def); + } + } + } + } + } + + if (MI->isImplicitDef() || MI->isKill()) + continue; + + if (MI->isInlineAsm() || MI->hasUnmodeledSideEffects()) { + // Blow away all non-allocatable physical registers knowledge since we + // don't know what's correct anymore. + // + // FIXME: handle explicit asm clobbers. + DEBUG(dbgs() << "NAPhysCopy: blowing away all info due to " << *MI + << '\n'); + NAPhysToVirtMIs.clear(); continue; } - if (MI->mayStore() || MI->isCall()) - FoldAsLoadDefCandidates.clear(); if ((isUncoalescableCopy(*MI) && optimizeUncoalescableCopy(MI, LocalMIs)) || - (MI->isCompare() && optimizeCmpInstr(MI, MBB)) || + (MI->isCompare() && optimizeCmpInstr(MI, &MBB)) || (MI->isSelect() && optimizeSelect(MI, LocalMIs))) { // MI is deleted. LocalMIs.erase(MI); @@ -1129,17 +1595,26 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { continue; } + if (MI->isCopy() && + (foldRedundantCopy(MI, CopySrcRegs, CopySrcMIs) || + foldRedundantNAPhysCopy(MI, NAPhysToVirtMIs))) { + LocalMIs.erase(MI); + MI->eraseFromParent(); + Changed = true; + continue; + } + if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) { SeenMoveImm = true; } else { - Changed |= optimizeExtInstr(MI, MBB, LocalMIs); + Changed |= optimizeExtInstr(MI, &MBB, LocalMIs); // optimizeExtInstr might have created new instructions after MI // and before the already incremented MII. Adjust MII so that the // next iteration sees the new instructions. MII = MI; ++MII; if (SeenMoveImm) - Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs); + Changed |= foldImmediate(MI, &MBB, ImmDefRegs, ImmDefMIs); } // Check whether MI is a load candidate for folding into a later @@ -1190,8 +1665,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { return Changed; } -bool ValueTracker::getNextSourceFromCopy(unsigned &SrcReg, - unsigned &SrcSubReg) { +ValueTrackerResult ValueTracker::getNextSourceFromCopy() { assert(Def->isCopy() && "Invalid definition"); // Copy instruction are supposed to be: Def = Src. // If someone breaks this assumption, bad things will happen everywhere. @@ -1199,30 +1673,27 @@ bool ValueTracker::getNextSourceFromCopy(unsigned &SrcReg, if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) // If we look for a different subreg, it means we want a subreg of src. - // Bails as we do not support composing subreg yet. - return false; + // Bails as we do not support composing subregs yet. + return ValueTrackerResult(); // Otherwise, we want the whole source. const MachineOperand &Src = Def->getOperand(1); - SrcReg = Src.getReg(); - SrcSubReg = Src.getSubReg(); - return true; + return ValueTrackerResult(Src.getReg(), Src.getSubReg()); } -bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcReg, - unsigned &SrcSubReg) { +ValueTrackerResult ValueTracker::getNextSourceFromBitcast() { assert(Def->isBitcast() && "Invalid definition"); // Bail if there are effects that a plain copy will not expose. if (Def->hasUnmodeledSideEffects()) - return false; + return ValueTrackerResult(); // Bitcasts with more than one def are not supported. if (Def->getDesc().getNumDefs() != 1) - return false; + return ValueTrackerResult(); if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) // If we look for a different subreg, it means we want a subreg of the src. - // Bails as we do not support composing subreg yet. - return false; + // Bails as we do not support composing subregs yet. + return ValueTrackerResult(); unsigned SrcIdx = Def->getNumOperands(); for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx; @@ -1230,25 +1701,25 @@ bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcReg, const MachineOperand &MO = Def->getOperand(OpIdx); if (!MO.isReg() || !MO.getReg()) continue; + // Ignore dead implicit defs. + if (MO.isImplicit() && MO.isDead()) + continue; assert(!MO.isDef() && "We should have skipped all the definitions by now"); if (SrcIdx != EndOpIdx) // Multiple sources? - return false; + return ValueTrackerResult(); SrcIdx = OpIdx; } const MachineOperand &Src = Def->getOperand(SrcIdx); - SrcReg = Src.getReg(); - SrcSubReg = Src.getSubReg(); - return true; + return ValueTrackerResult(Src.getReg(), Src.getSubReg()); } -bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcReg, - unsigned &SrcSubReg) { +ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() { assert((Def->isRegSequence() || Def->isRegSequenceLike()) && "Invalid definition"); if (Def->getOperand(DefIdx).getSubReg()) - // If we are composing subreg, bails out. + // If we are composing subregs, bail out. // The case we are checking is Def.<subreg> = REG_SEQUENCE. // This should almost never happen as the SSA property is tracked at // the register level (as opposed to the subreg level). @@ -1262,16 +1733,16 @@ bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcReg, // have this case. // If we can ascertain (or force) that this never happens, we could // turn that into an assertion. - return false; + return ValueTrackerResult(); if (!TII) // We could handle the REG_SEQUENCE here, but we do not want to // duplicate the code from the generic TII. - return false; + return ValueTrackerResult(); SmallVector<TargetInstrInfo::RegSubRegPairAndIdx, 8> RegSeqInputRegs; if (!TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs)) - return false; + return ValueTrackerResult(); // We are looking at: // Def = REG_SEQUENCE v0, sub0, v1, sub1, ... @@ -1279,41 +1750,38 @@ bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcReg, for (auto &RegSeqInput : RegSeqInputRegs) { if (RegSeqInput.SubIdx == DefSubReg) { if (RegSeqInput.SubReg) - // Bails if we have to compose sub registers. - return false; + // Bail if we have to compose sub registers. + return ValueTrackerResult(); - SrcReg = RegSeqInput.Reg; - SrcSubReg = RegSeqInput.SubReg; - return true; + return ValueTrackerResult(RegSeqInput.Reg, RegSeqInput.SubReg); } } // If the subreg we are tracking is super-defined by another subreg, // we could follow this value. However, this would require to compose // the subreg and we do not do that for now. - return false; + return ValueTrackerResult(); } -bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcReg, - unsigned &SrcSubReg) { +ValueTrackerResult ValueTracker::getNextSourceFromInsertSubreg() { assert((Def->isInsertSubreg() || Def->isInsertSubregLike()) && "Invalid definition"); if (Def->getOperand(DefIdx).getSubReg()) - // If we are composing subreg, bails out. + // If we are composing subreg, bail out. // Same remark as getNextSourceFromRegSequence. // I.e., this may be turned into an assert. - return false; + return ValueTrackerResult(); if (!TII) // We could handle the REG_SEQUENCE here, but we do not want to // duplicate the code from the generic TII. - return false; + return ValueTrackerResult(); TargetInstrInfo::RegSubRegPair BaseReg; TargetInstrInfo::RegSubRegPairAndIdx InsertedReg; if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg)) - return false; + return ValueTrackerResult(); // We are looking at: // Def = INSERT_SUBREG v0, v1, sub1 @@ -1323,9 +1791,7 @@ bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcReg, // #1 Check if the inserted register matches the required sub index. if (InsertedReg.SubIdx == DefSubReg) { - SrcReg = InsertedReg.Reg; - SrcSubReg = InsertedReg.SubReg; - return true; + return ValueTrackerResult(InsertedReg.Reg, InsertedReg.SubReg); } // #2 Otherwise, if the sub register we are looking for is not partial // defined by the inserted element, we can look through the main @@ -1333,10 +1799,10 @@ bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcReg, const MachineOperand &MODef = Def->getOperand(DefIdx); // If the result register (Def) and the base register (v0) do not // have the same register class or if we have to compose - // subregisters, bails out. + // subregisters, bail out. if (MRI.getRegClass(MODef.getReg()) != MRI.getRegClass(BaseReg.Reg) || BaseReg.SubReg) - return false; + return ValueTrackerResult(); // Get the TRI and check if the inserted sub-register overlaps with the // sub-register we are tracking. @@ -1344,121 +1810,138 @@ bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcReg, if (!TRI || (TRI->getSubRegIndexLaneMask(DefSubReg) & TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)) != 0) - return false; + return ValueTrackerResult(); // At this point, the value is available in v0 via the same subreg // we used for Def. - SrcReg = BaseReg.Reg; - SrcSubReg = DefSubReg; - return true; + return ValueTrackerResult(BaseReg.Reg, DefSubReg); } -bool ValueTracker::getNextSourceFromExtractSubreg(unsigned &SrcReg, - unsigned &SrcSubReg) { +ValueTrackerResult ValueTracker::getNextSourceFromExtractSubreg() { assert((Def->isExtractSubreg() || Def->isExtractSubregLike()) && "Invalid definition"); // We are looking at: // Def = EXTRACT_SUBREG v0, sub0 - // Bails if we have to compose sub registers. + // Bail if we have to compose sub registers. // Indeed, if DefSubReg != 0, we would have to compose it with sub0. if (DefSubReg) - return false; + return ValueTrackerResult(); if (!TII) // We could handle the EXTRACT_SUBREG here, but we do not want to // duplicate the code from the generic TII. - return false; + return ValueTrackerResult(); TargetInstrInfo::RegSubRegPairAndIdx ExtractSubregInputReg; if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg)) - return false; + return ValueTrackerResult(); - // Bails if we have to compose sub registers. + // Bail if we have to compose sub registers. // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0. if (ExtractSubregInputReg.SubReg) - return false; + return ValueTrackerResult(); // Otherwise, the value is available in the v0.sub0. - SrcReg = ExtractSubregInputReg.Reg; - SrcSubReg = ExtractSubregInputReg.SubIdx; - return true; + return ValueTrackerResult(ExtractSubregInputReg.Reg, + ExtractSubregInputReg.SubIdx); } -bool ValueTracker::getNextSourceFromSubregToReg(unsigned &SrcReg, - unsigned &SrcSubReg) { +ValueTrackerResult ValueTracker::getNextSourceFromSubregToReg() { assert(Def->isSubregToReg() && "Invalid definition"); // We are looking at: // Def = SUBREG_TO_REG Imm, v0, sub0 - // Bails if we have to compose sub registers. + // Bail if we have to compose sub registers. // If DefSubReg != sub0, we would have to check that all the bits // we track are included in sub0 and if yes, we would have to // determine the right subreg in v0. if (DefSubReg != Def->getOperand(3).getImm()) - return false; - // Bails if we have to compose sub registers. + return ValueTrackerResult(); + // Bail if we have to compose sub registers. // Likewise, if v0.subreg != 0, we would have to compose it with sub0. if (Def->getOperand(2).getSubReg()) - return false; + return ValueTrackerResult(); - SrcReg = Def->getOperand(2).getReg(); - SrcSubReg = Def->getOperand(3).getImm(); - return true; + return ValueTrackerResult(Def->getOperand(2).getReg(), + Def->getOperand(3).getImm()); +} + +/// \brief Explore each PHI incoming operand and return its sources +ValueTrackerResult ValueTracker::getNextSourceFromPHI() { + assert(Def->isPHI() && "Invalid definition"); + ValueTrackerResult Res; + + // If we look for a different subreg, bail as we do not support composing + // subregs yet. + if (Def->getOperand(0).getSubReg() != DefSubReg) + return ValueTrackerResult(); + + // Return all register sources for PHI instructions. + for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) { + auto &MO = Def->getOperand(i); + assert(MO.isReg() && "Invalid PHI instruction"); + Res.addSource(MO.getReg(), MO.getSubReg()); + } + + return Res; } -bool ValueTracker::getNextSourceImpl(unsigned &SrcReg, unsigned &SrcSubReg) { +ValueTrackerResult ValueTracker::getNextSourceImpl() { assert(Def && "This method needs a valid definition"); assert( (DefIdx < Def->getDesc().getNumDefs() || Def->getDesc().isVariadic()) && Def->getOperand(DefIdx).isDef() && "Invalid DefIdx"); if (Def->isCopy()) - return getNextSourceFromCopy(SrcReg, SrcSubReg); + return getNextSourceFromCopy(); if (Def->isBitcast()) - return getNextSourceFromBitcast(SrcReg, SrcSubReg); + return getNextSourceFromBitcast(); // All the remaining cases involve "complex" instructions. - // Bails if we did not ask for the advanced tracking. + // Bail if we did not ask for the advanced tracking. if (!UseAdvancedTracking) - return false; + return ValueTrackerResult(); if (Def->isRegSequence() || Def->isRegSequenceLike()) - return getNextSourceFromRegSequence(SrcReg, SrcSubReg); + return getNextSourceFromRegSequence(); if (Def->isInsertSubreg() || Def->isInsertSubregLike()) - return getNextSourceFromInsertSubreg(SrcReg, SrcSubReg); + return getNextSourceFromInsertSubreg(); if (Def->isExtractSubreg() || Def->isExtractSubregLike()) - return getNextSourceFromExtractSubreg(SrcReg, SrcSubReg); + return getNextSourceFromExtractSubreg(); if (Def->isSubregToReg()) - return getNextSourceFromSubregToReg(SrcReg, SrcSubReg); - return false; + return getNextSourceFromSubregToReg(); + if (Def->isPHI()) + return getNextSourceFromPHI(); + return ValueTrackerResult(); } -const MachineInstr *ValueTracker::getNextSource(unsigned &SrcReg, - unsigned &SrcSubReg) { +ValueTrackerResult ValueTracker::getNextSource() { // If we reach a point where we cannot move up in the use-def chain, // there is nothing we can get. if (!Def) - return nullptr; + return ValueTrackerResult(); - const MachineInstr *PrevDef = nullptr; - // Try to find the next source. - if (getNextSourceImpl(SrcReg, SrcSubReg)) { + ValueTrackerResult Res = getNextSourceImpl(); + if (Res.isValid()) { // Update definition, definition index, and subregister for the // next call of getNextSource. // Update the current register. - Reg = SrcReg; - // Update the return value before moving up in the use-def chain. - PrevDef = Def; + bool OneRegSrc = Res.getNumSources() == 1; + if (OneRegSrc) + Reg = Res.getSrcReg(0); + // Update the result before moving up in the use-def chain + // with the instruction containing the last found sources. + Res.setInst(Def); + // If we can still move up in the use-def chain, move to the next - // defintion. - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) { + // definition. + if (!TargetRegisterInfo::isPhysicalRegister(Reg) && OneRegSrc) { Def = MRI.getVRegDef(Reg); DefIdx = MRI.def_begin(Reg).getOperandNo(); - DefSubReg = SrcSubReg; - return PrevDef; + DefSubReg = Res.getSrcSubReg(0); + return Res; } } // If we end up here, this means we will not be able to find another source - // for the next iteration. - // Make sure any new call to getNextSource bails out early by cutting the - // use-def chain. + // for the next iteration. Make sure any new call to getNextSource bails out + // early by cutting the use-def chain. Def = nullptr; - return PrevDef; + return Res; } diff --git a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp index 6f76116da1eb..b95dffd05c46 100644 --- a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp +++ b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp @@ -87,7 +87,7 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); - AU.addRequired<AliasAnalysis>(); + AU.addRequired<AAResultsWrapperPass>(); AU.addRequired<TargetPassConfig>(); AU.addRequired<MachineDominatorTree>(); AU.addPreserved<MachineDominatorTree>(); @@ -196,7 +196,7 @@ SchedulePostRATDList::SchedulePostRATDList( const RegisterClassInfo &RCI, TargetSubtargetInfo::AntiDepBreakMode AntiDepMode, SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs) - : ScheduleDAGInstrs(MF, &MLI, /*IsPostRA=*/true), AA(AA), EndIndex(0) { + : ScheduleDAGInstrs(MF, &MLI), AA(AA), EndIndex(0) { const InstrItineraryData *InstrItins = MF.getSubtarget().getInstrItineraryData(); @@ -267,7 +267,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { TII = Fn.getSubtarget().getInstrInfo(); MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); - AliasAnalysis *AA = &getAnalysis<AliasAnalysis>(); + AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>(); RegClassInfo.runOnMachineFunction(Fn); @@ -302,8 +302,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { CriticalPathRCs); // Loop over all of the basic blocks - for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); - MBB != MBBe; ++MBB) { + for (auto &MBB : Fn) { #ifndef NDEBUG // If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod if (DebugDiv > 0) { @@ -311,25 +310,25 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { if (bbcnt++ % DebugDiv != DebugMod) continue; dbgs() << "*** DEBUG scheduling " << Fn.getName() - << ":BB#" << MBB->getNumber() << " ***\n"; + << ":BB#" << MBB.getNumber() << " ***\n"; } #endif // Initialize register live-range state for scheduling in this block. - Scheduler.startBlock(MBB); + Scheduler.startBlock(&MBB); // Schedule each sequence of instructions not interrupted by a label // or anything else that effectively needs to shut down scheduling. - MachineBasicBlock::iterator Current = MBB->end(); - unsigned Count = MBB->size(), CurrentCount = Count; - for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) { + MachineBasicBlock::iterator Current = MBB.end(); + unsigned Count = MBB.size(), CurrentCount = Count; + for (MachineBasicBlock::iterator I = Current; I != MBB.begin();) { MachineInstr *MI = std::prev(I); --Count; // Calls are not scheduling boundaries before register allocation, but // post-ra we don't gain anything by scheduling across calls since we // don't need to worry about register pressure. - if (MI->isCall() || TII->isSchedulingBoundary(MI, MBB, Fn)) { - Scheduler.enterRegion(MBB, I, Current, CurrentCount - Count); + if (MI->isCall() || TII->isSchedulingBoundary(MI, &MBB, Fn)) { + Scheduler.enterRegion(&MBB, I, Current, CurrentCount - Count); Scheduler.setEndIndex(CurrentCount); Scheduler.schedule(); Scheduler.exitRegion(); @@ -343,9 +342,9 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { Count -= MI->getBundleSize(); } assert(Count == 0 && "Instruction count mismatch!"); - assert((MBB->begin() == Current || CurrentCount != 0) && + assert((MBB.begin() == Current || CurrentCount != 0) && "Instruction count mismatch!"); - Scheduler.enterRegion(MBB, MBB->begin(), Current, CurrentCount); + Scheduler.enterRegion(&MBB, MBB.begin(), Current, CurrentCount); Scheduler.setEndIndex(CurrentCount); Scheduler.schedule(); Scheduler.exitRegion(); @@ -355,7 +354,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { Scheduler.finishBlock(); // Update register kills - Scheduler.fixupKills(MBB); + Scheduler.fixupKills(&MBB); } return true; @@ -400,8 +399,12 @@ void SchedulePostRATDList::schedule() { } DEBUG(dbgs() << "********** List Scheduling **********\n"); - DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) - SUnits[su].dumpAll(this)); + DEBUG( + for (const SUnit &SU : SUnits) { + SU.dumpAll(this); + dbgs() << '\n'; + } + ); AvailableQueue.initNodes(SUnits); ListScheduleTopDown(); diff --git a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp index 5f8194983484..d27ea2f51867 100644 --- a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp @@ -58,7 +58,7 @@ INITIALIZE_PASS_END(ProcessImplicitDefs, "processimpdefs", void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); - AU.addPreserved<AliasAnalysis>(); + AU.addPreserved<AAResultsWrapperPass>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -96,7 +96,7 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) { // This is a physreg implicit-def. // Look for the first instruction to use or define an alias. - MachineBasicBlock::instr_iterator UserMI = MI; + MachineBasicBlock::instr_iterator UserMI = MI->getIterator(); MachineBasicBlock::instr_iterator UserE = MI->getParent()->instr_end(); bool Found = false; for (++UserMI; UserMI != UserE; ++UserMI) { @@ -151,7 +151,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) { for (MachineBasicBlock::instr_iterator MBBI = MFI->instr_begin(), MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) if (MBBI->isImplicitDef()) - WorkList.insert(MBBI); + WorkList.insert(&*MBBI); if (WorkList.empty()) continue; diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp index 6ca69a124297..939c50027b02 100644 --- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -71,8 +71,9 @@ private: // stack frame indexes. unsigned MinCSFrameIndex, MaxCSFrameIndex; - // Save and Restore blocks of the current function. - MachineBasicBlock *SaveBlock; + // Save and Restore blocks of the current function. Typically there is a + // single save block, unless Windows EH funclets are involved. + SmallVector<MachineBasicBlock *, 1> SaveBlocks; SmallVector<MachineBasicBlock *, 4> RestoreBlocks; // Flag to control whether to use the register scavenger to resolve @@ -91,9 +92,6 @@ private: int &SPAdj); void scavengeFrameVirtualRegs(MachineFunction &Fn); void insertPrologEpilogCode(MachineFunction &Fn); - - // Convenience for recognizing return blocks. - bool isReturnBlock(const MachineBasicBlock *MBB) const; }; } // namespace @@ -128,10 +126,6 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } -bool PEI::isReturnBlock(const MachineBasicBlock* MBB) const { - return (MBB && !MBB->empty() && MBB->back().isReturn()); -} - /// Compute the set of return blocks void PEI::calculateSets(MachineFunction &Fn) { const MachineFrameInfo *MFI = Fn.getFrameInfo(); @@ -142,25 +136,25 @@ void PEI::calculateSets(MachineFunction &Fn) { // Use the points found by shrink-wrapping, if any. if (MFI->getSavePoint()) { - SaveBlock = MFI->getSavePoint(); + SaveBlocks.push_back(MFI->getSavePoint()); assert(MFI->getRestorePoint() && "Both restore and save must be set"); MachineBasicBlock *RestoreBlock = MFI->getRestorePoint(); // If RestoreBlock does not have any successor and is not a return block // then the end point is unreachable and we do not need to insert any // epilogue. - if (!RestoreBlock->succ_empty() || isReturnBlock(RestoreBlock)) + if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock()) RestoreBlocks.push_back(RestoreBlock); return; } // Save refs to entry and return blocks. - SaveBlock = Fn.begin(); - for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end(); - MBB != E; ++MBB) - if (isReturnBlock(MBB)) - RestoreBlocks.push_back(MBB); - - return; + SaveBlocks.push_back(&Fn.front()); + for (MachineBasicBlock &MBB : Fn) { + if (MBB.isEHFuncletEntry()) + SaveBlocks.push_back(&MBB); + if (MBB.isReturnBlock()) + RestoreBlocks.push_back(&MBB); + } } /// StackObjSet - A set of stack object indexes @@ -195,7 +189,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // place all spills in the entry block, all restores in return blocks. calculateSets(Fn); - // Add the code to save and restore the callee saved registers + // Add the code to save and restore the callee saved registers. if (!F->hasFnAttribute(Attribute::Naked)) insertCSRSpillsAndRestores(Fn); @@ -237,6 +231,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { } delete RS; + SaveBlocks.clear(); RestoreBlocks.clear(); return true; } @@ -407,7 +402,7 @@ static void updateLiveness(MachineFunction &MF) { const MachineBasicBlock *CurBB = WorkList.pop_back_val(); // By construction, the region that is after the save point is // dominated by the Save and post-dominated by the Restore. - if (CurBB == Save) + if (CurBB == Save && Save != Restore) continue; // Enqueue all the successors not already visited. // Those are by construction either before Save or after Restore. @@ -419,10 +414,13 @@ static void updateLiveness(MachineFunction &MF) { const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - for (MachineBasicBlock *MBB : Visited) + for (MachineBasicBlock *MBB : Visited) { + MCPhysReg Reg = CSI[i].getReg(); // Add the callee-saved register as live-in. // It's killed at the spill. - MBB->addLiveIn(CSI[i].getReg()); + if (!MBB->isLiveIn(Reg)) + MBB->addLiveIn(Reg); + } } } @@ -446,18 +444,20 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { MachineBasicBlock::iterator I; // Spill using target interface. - I = SaveBlock->begin(); - if (!TFI->spillCalleeSavedRegisters(*SaveBlock, I, CSI, TRI)) { - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - // Insert the spill to the stack frame. - unsigned Reg = CSI[i].getReg(); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(*SaveBlock, I, Reg, true, CSI[i].getFrameIdx(), - RC, TRI); + for (MachineBasicBlock *SaveBlock : SaveBlocks) { + I = SaveBlock->begin(); + if (!TFI->spillCalleeSavedRegisters(*SaveBlock, I, CSI, TRI)) { + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + // Insert the spill to the stack frame. + unsigned Reg = CSI[i].getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(*SaveBlock, I, Reg, true, CSI[i].getFrameIdx(), + RC, TRI); + } } + // Update the live-in information of all the blocks up to the save point. + updateLiveness(Fn); } - // Update the live-in information of all the blocks up to the save point. - updateLiveness(Fn); // Restore using target interface. for (MachineBasicBlock *MBB : RestoreBlocks) { @@ -500,7 +500,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { static inline void AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, bool StackGrowsDown, int64_t &Offset, - unsigned &MaxAlign) { + unsigned &MaxAlign, unsigned Skew) { // If the stack grows down, add the object size to find the lowest address. if (StackGrowsDown) Offset += MFI->getObjectSize(FrameIdx); @@ -512,7 +512,7 @@ AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, MaxAlign = std::max(MaxAlign, Align); // Adjust to alignment boundary. - Offset = (Offset + Align - 1) / Align * Align; + Offset = RoundUpToAlignment(Offset, Align, Skew); if (StackGrowsDown) { DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset << "]\n"); @@ -530,12 +530,12 @@ static void AssignProtectedObjSet(const StackObjSet &UnassignedObjs, SmallSet<int, 16> &ProtectedObjs, MachineFrameInfo *MFI, bool StackGrowsDown, - int64_t &Offset, unsigned &MaxAlign) { + int64_t &Offset, unsigned &MaxAlign, unsigned Skew) { for (StackObjSet::const_iterator I = UnassignedObjs.begin(), E = UnassignedObjs.end(); I != E; ++I) { int i = *I; - AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign); + AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign, Skew); ProtectedObjs.insert(i); } } @@ -563,6 +563,9 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { && "Local area offset should be in direction of stack growth"); int64_t Offset = LocalAreaOffset; + // Skew to be applied to alignment. + unsigned Skew = TFI.getStackAlignmentSkew(Fn); + // If there are fixed sized objects that are preallocated in the local area, // non-fixed objects can't be allocated right at the start of local area. // We currently don't support filling in holes in between fixed sized @@ -593,7 +596,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { unsigned Align = MFI->getObjectAlignment(i); // Adjust to alignment boundary - Offset = RoundUpToAlignment(Offset, Align); + Offset = RoundUpToAlignment(Offset, Align, Skew); MFI->setObjectOffset(i, -Offset); // Set the computed offset } @@ -602,7 +605,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { for (int i = MaxCSFI; i >= MinCSFI ; --i) { unsigned Align = MFI->getObjectAlignment(i); // Adjust to alignment boundary - Offset = RoundUpToAlignment(Offset, Align); + Offset = RoundUpToAlignment(Offset, Align, Skew); MFI->setObjectOffset(i, Offset); Offset += MFI->getObjectSize(i); @@ -624,7 +627,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { RS->getScavengingFrameIndices(SFIs); for (SmallVectorImpl<int>::iterator I = SFIs.begin(), IE = SFIs.end(); I != IE; ++I) - AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign); + AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign, Skew); } // FIXME: Once this is working, then enable flag will change to a target @@ -635,7 +638,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { unsigned Align = MFI->getLocalFrameMaxAlign(); // Adjust to alignment boundary. - Offset = RoundUpToAlignment(Offset, Align); + Offset = RoundUpToAlignment(Offset, Align, Skew); DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n"); @@ -662,7 +665,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { StackObjSet AddrOfObjs; AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown, - Offset, MaxAlign); + Offset, MaxAlign, Skew); // Assign large stack objects first. for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { @@ -695,11 +698,11 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { } AssignProtectedObjSet(LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown, - Offset, MaxAlign); + Offset, MaxAlign, Skew); AssignProtectedObjSet(SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown, - Offset, MaxAlign); + Offset, MaxAlign, Skew); AssignProtectedObjSet(AddrOfObjs, ProtectedObjs, MFI, StackGrowsDown, - Offset, MaxAlign); + Offset, MaxAlign, Skew); } // Then assign frame offsets to stack objects that are not used to spill @@ -719,7 +722,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { if (ProtectedObjs.count(i)) continue; - AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign); + AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign, Skew); } // Make sure the special register scavenging spill slot is closest to the @@ -729,7 +732,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { RS->getScavengingFrameIndices(SFIs); for (SmallVectorImpl<int>::iterator I = SFIs.begin(), IE = SFIs.end(); I != IE; ++I) - AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign); + AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign, Skew); } if (!TFI.targetHandlesStackFrameRounding()) { @@ -754,7 +757,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // If the frame pointer is eliminated, all frame offsets will be relative to // SP not FP. Align to MaxAlign so this works. StackAlign = std::max(StackAlign, MaxAlign); - Offset = RoundUpToAlignment(Offset, StackAlign); + Offset = RoundUpToAlignment(Offset, StackAlign, Skew); } // Update frame info to pretend that this is part of the stack... @@ -771,18 +774,24 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) { const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); // Add prologue to the function... - TFI.emitPrologue(Fn, *SaveBlock); + for (MachineBasicBlock *SaveBlock : SaveBlocks) + TFI.emitPrologue(Fn, *SaveBlock); // Add epilogue to restore the callee-save registers in each exiting block. for (MachineBasicBlock *RestoreBlock : RestoreBlocks) TFI.emitEpilogue(Fn, *RestoreBlock); + for (MachineBasicBlock *SaveBlock : SaveBlocks) + TFI.inlineStackProbe(Fn, *SaveBlock); + // Emit additional code that is required to support segmented stacks, if // we've been asked for it. This, when linked with a runtime with support // for segmented stacks (libgcc is one), will result in allocating stack // space in small chunks instead of one large contiguous block. - if (Fn.shouldSplitStack()) - TFI.adjustForSegmentedStacks(Fn, *SaveBlock); + if (Fn.shouldSplitStack()) { + for (MachineBasicBlock *SaveBlock : SaveBlocks) + TFI.adjustForSegmentedStacks(Fn, *SaveBlock); + } // Emit additional code that is required to explicitly handle the stack in // HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The @@ -790,7 +799,8 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) { // different conditional check and another BIF for allocating more stack // space. if (Fn.getFunction()->getCallingConv() == CallingConv::HiPE) - TFI.adjustForHiPEPrologue(Fn, *SaveBlock); + for (MachineBasicBlock *SaveBlock : SaveBlocks) + TFI.adjustForHiPEPrologue(Fn, *SaveBlock); } /// replaceFrameIndices - Replace all MO_FrameIndex operands with physical @@ -800,25 +810,6 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); if (!TFI.needsFrameIndexResolution(Fn)) return; - MachineModuleInfo &MMI = Fn.getMMI(); - const Function *F = Fn.getFunction(); - const Function *ParentF = MMI.getWinEHParent(F); - unsigned FrameReg; - if (F == ParentF) { - WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(Fn.getFunction()); - // FIXME: This should be unconditional but we have bugs in the preparation - // pass. - if (FuncInfo.UnwindHelpFrameIdx != INT_MAX) - FuncInfo.UnwindHelpFrameOffset = TFI.getFrameIndexReferenceFromSP( - Fn, FuncInfo.UnwindHelpFrameIdx, FrameReg); - } else if (MMI.hasWinEHFuncInfo(F)) { - WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(Fn.getFunction()); - auto I = FuncInfo.CatchHandlerParentFrameObjIdx.find(F); - if (I != FuncInfo.CatchHandlerParentFrameObjIdx.end()) - FuncInfo.CatchHandlerParentFrameObjOffset[F] = - TFI.getFrameIndexReferenceFromSP(Fn, I->second, FrameReg); - } - // Store SPAdj at exit of a basic block. SmallVector<int, 8> SPState; SPState.resize(Fn.getNumBlockIDs()); @@ -841,12 +832,12 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { } // Handle the unreachable blocks. - for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { - if (Reachable.count(BB)) + for (auto &BB : Fn) { + if (Reachable.count(&BB)) // Already handled in DFS traversal. continue; int SPAdj = 0; - replaceFrameIndices(BB, Fn, SPAdj); + replaceFrameIndices(&BB, Fn, SPAdj); } } @@ -889,11 +880,11 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, if (!MI->getOperand(i).isFI()) continue; - // Frame indicies in debug values are encoded in a target independent + // Frame indices in debug values are encoded in a target independent // way with simply the frame index and offset rather than any // target-specific addressing mode. if (MI->isDebugValue()) { - assert(i == 0 && "Frame indicies can only appear as the first " + assert(i == 0 && "Frame indices can only appear as the first " "operand of a DBG_VALUE machine instruction"); unsigned Reg; MachineOperand &Offset = MI->getOperand(1); @@ -979,7 +970,7 @@ PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { // Run through the instructions and find any virtual registers. for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { - RS->enterBasicBlock(BB); + RS->enterBasicBlock(&*BB); int SPAdj = 0; @@ -1026,12 +1017,8 @@ PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { // Replace this reference to the virtual register with the // scratch register. assert (ScratchReg && "Missing scratch register!"); - MachineRegisterInfo &MRI = Fn.getRegInfo(); Fn.getRegInfo().replaceRegWith(Reg, ScratchReg); - // Make sure MRI now accounts this register as used. - MRI.setPhysRegUsed(ScratchReg); - // Because this instruction was processed by the RS before this // register was allocated, make sure that the RS now records the // register as being used. @@ -1044,7 +1031,7 @@ PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { // problem because we need the spill code before I: Move I to just // prior to J. if (I != std::prev(J)) { - BB->splice(J, BB, I); + BB->splice(J, &*BB, I); // Before we move I, we need to prepare the RS to visit I again. // Specifically, RS will assert if it sees uses of registers that diff --git a/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp b/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp index b1c341d3a681..1f46417e61e7 100644 --- a/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp +++ b/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/IR/DerivedTypes.h" @@ -22,87 +23,38 @@ #include <map> using namespace llvm; -namespace { -struct PSVGlobalsTy { - // PseudoSourceValues are immutable so don't need locking. - const PseudoSourceValue PSVs[4]; - sys::Mutex Lock; // Guards FSValues, but not the values inside it. - std::map<int, const PseudoSourceValue *> FSValues; - - PSVGlobalsTy() : PSVs() {} - ~PSVGlobalsTy() { - for (std::map<int, const PseudoSourceValue *>::iterator - I = FSValues.begin(), E = FSValues.end(); I != E; ++I) { - delete I->second; - } - } -}; - -static ManagedStatic<PSVGlobalsTy> PSVGlobals; - -} // anonymous namespace - -const PseudoSourceValue *PseudoSourceValue::getStack() -{ return &PSVGlobals->PSVs[0]; } -const PseudoSourceValue *PseudoSourceValue::getGOT() -{ return &PSVGlobals->PSVs[1]; } -const PseudoSourceValue *PseudoSourceValue::getJumpTable() -{ return &PSVGlobals->PSVs[2]; } -const PseudoSourceValue *PseudoSourceValue::getConstantPool() -{ return &PSVGlobals->PSVs[3]; } - static const char *const PSVNames[] = { - "Stack", - "GOT", - "JumpTable", - "ConstantPool" -}; + "Stack", "GOT", "JumpTable", "ConstantPool", "FixedStack", + "GlobalValueCallEntry", "ExternalSymbolCallEntry"}; -PseudoSourceValue::PseudoSourceValue(bool isFixed) : isFixed(isFixed) {} +PseudoSourceValue::PseudoSourceValue(PSVKind Kind) : Kind(Kind) {} PseudoSourceValue::~PseudoSourceValue() {} void PseudoSourceValue::printCustom(raw_ostream &O) const { - O << PSVNames[this - PSVGlobals->PSVs]; -} - -const PseudoSourceValue *PseudoSourceValue::getFixedStack(int FI) { - PSVGlobalsTy &PG = *PSVGlobals; - sys::ScopedLock locked(PG.Lock); - const PseudoSourceValue *&V = PG.FSValues[FI]; - if (!V) - V = new FixedStackPseudoSourceValue(FI); - return V; + O << PSVNames[Kind]; } bool PseudoSourceValue::isConstant(const MachineFrameInfo *) const { - if (this == getStack()) + if (isStack()) return false; - if (this == getGOT() || - this == getConstantPool() || - this == getJumpTable()) + if (isGOT() || isConstantPool() || isJumpTable()) return true; llvm_unreachable("Unknown PseudoSourceValue!"); } -bool PseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const { - if (this == getStack() || - this == getGOT() || - this == getConstantPool() || - this == getJumpTable()) +bool PseudoSourceValue::isAliased(const MachineFrameInfo *) const { + if (isStack() || isGOT() || isConstantPool() || isJumpTable()) return false; llvm_unreachable("Unknown PseudoSourceValue!"); } -bool PseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const { - if (this == getGOT() || - this == getConstantPool() || - this == getJumpTable()) - return false; - return true; +bool PseudoSourceValue::mayAlias(const MachineFrameInfo *) const { + return !(isGOT() || isConstantPool() || isJumpTable()); } -bool FixedStackPseudoSourceValue::isConstant(const MachineFrameInfo *MFI) const{ +bool FixedStackPseudoSourceValue::isConstant( + const MachineFrameInfo *MFI) const { return MFI && MFI->isImmutableObjectIndex(FI); } @@ -122,3 +74,69 @@ bool FixedStackPseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const { void FixedStackPseudoSourceValue::printCustom(raw_ostream &OS) const { OS << "FixedStack" << FI; } + +CallEntryPseudoSourceValue::CallEntryPseudoSourceValue(PSVKind Kind) + : PseudoSourceValue(Kind) {} + +bool CallEntryPseudoSourceValue::isConstant(const MachineFrameInfo *) const { + return false; +} + +bool CallEntryPseudoSourceValue::isAliased(const MachineFrameInfo *) const { + return false; +} + +bool CallEntryPseudoSourceValue::mayAlias(const MachineFrameInfo *) const { + return false; +} + +GlobalValuePseudoSourceValue::GlobalValuePseudoSourceValue( + const GlobalValue *GV) + : CallEntryPseudoSourceValue(GlobalValueCallEntry), GV(GV) {} + +ExternalSymbolPseudoSourceValue::ExternalSymbolPseudoSourceValue(const char *ES) + : CallEntryPseudoSourceValue(ExternalSymbolCallEntry), ES(ES) {} + +PseudoSourceValueManager::PseudoSourceValueManager() + : StackPSV(PseudoSourceValue::Stack), GOTPSV(PseudoSourceValue::GOT), + JumpTablePSV(PseudoSourceValue::JumpTable), + ConstantPoolPSV(PseudoSourceValue::ConstantPool) {} + +const PseudoSourceValue *PseudoSourceValueManager::getStack() { + return &StackPSV; +} + +const PseudoSourceValue *PseudoSourceValueManager::getGOT() { return &GOTPSV; } + +const PseudoSourceValue *PseudoSourceValueManager::getConstantPool() { + return &ConstantPoolPSV; +} + +const PseudoSourceValue *PseudoSourceValueManager::getJumpTable() { + return &JumpTablePSV; +} + +const PseudoSourceValue *PseudoSourceValueManager::getFixedStack(int FI) { + std::unique_ptr<FixedStackPseudoSourceValue> &V = FSValues[FI]; + if (!V) + V = llvm::make_unique<FixedStackPseudoSourceValue>(FI); + return V.get(); +} + +const PseudoSourceValue * +PseudoSourceValueManager::getGlobalValueCallEntry(const GlobalValue *GV) { + std::unique_ptr<const GlobalValuePseudoSourceValue> &E = + GlobalCallEntries[GV]; + if (!E) + E = llvm::make_unique<GlobalValuePseudoSourceValue>(GV); + return E.get(); +} + +const PseudoSourceValue * +PseudoSourceValueManager::getExternalSymbolCallEntry(const char *ES) { + std::unique_ptr<const ExternalSymbolPseudoSourceValue> &E = + ExternalCallEntries[ES]; + if (!E) + E = llvm::make_unique<ExternalSymbolPseudoSourceValue>(ES); + return E.get(); +} diff --git a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp index 0090332a8123..cfe367d5115c 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp @@ -133,8 +133,8 @@ RABasic::RABasic(): MachineFunctionPass(ID) { void RABasic::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); - AU.addRequired<AliasAnalysis>(); - AU.addPreserved<AliasAnalysis>(); + AU.addRequired<AAResultsWrapperPass>(); + AU.addPreserved<AAResultsWrapperPass>(); AU.addRequired<LiveIntervals>(); AU.addPreserved<LiveIntervals>(); AU.addPreserved<SlotIndexes>(); @@ -223,7 +223,7 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, SmallVector<unsigned, 8> PhysRegSpillCands; // Check for an available register in this class. - AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); + AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix); while (unsigned PhysReg = Order.next()) { // Check for interference in PhysReg switch (Matrix->checkInterference(VirtReg, PhysReg)) { @@ -276,7 +276,7 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) { getAnalysis<LiveIntervals>(), getAnalysis<LiveRegMatrix>()); - calculateSpillWeightsAndHints(*LIS, *MF, + calculateSpillWeightsAndHints(*LIS, *MF, VRM, getAnalysis<MachineLoopInfo>(), getAnalysis<MachineBlockFrequencyInfo>()); diff --git a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp index fd3d4d78968b..f4c076fea0e7 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp @@ -799,10 +799,9 @@ void RAFast::AllocateBasicBlock() { MachineBasicBlock::iterator MII = MBB->begin(); // Add live-in registers as live. - for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(), - E = MBB->livein_end(); I != E; ++I) - if (MRI->isAllocatable(*I)) - definePhysReg(MII, *I, regReserved); + for (const auto &LI : MBB->liveins()) + if (MRI->isAllocatable(LI.PhysReg)) + definePhysReg(MII, LI.PhysReg, regReserved); SmallVector<unsigned, 8> VirtDead; SmallVector<MachineInstr*, 32> Coalesced; @@ -986,10 +985,6 @@ void RAFast::AllocateBasicBlock() { } } - for (UsedInInstrSet::iterator - I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I) - MRI->setRegUnitUsed(*I); - // Track registers defined by instruction - early clobbers and tied uses at // this point. UsedInInstr.clear(); @@ -1050,10 +1045,6 @@ void RAFast::AllocateBasicBlock() { killVirtReg(VirtDead[i]); VirtDead.clear(); - for (UsedInInstrSet::iterator - I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I) - MRI->setRegUnitUsed(*I); - if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) { DEBUG(dbgs() << "-- coalescing: " << *MI); Coalesced.push_back(MI); @@ -1103,12 +1094,6 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) { AllocateBasicBlock(); } - // Add the clobber lists for all the instructions we skipped earlier. - for (const MCInstrDesc *Desc : SkippedInstrs) - if (const uint16_t *Defs = Desc->getImplicitDefs()) - while (*Defs) - MRI->setPhysRegUsed(*Defs++); - // All machine operands and other references to virtual registers have been // replaced. Remove the virtual registers. MRI->clearVirtRegs(); diff --git a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp index 7ebcf7f54856..945cb9e2c993 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -86,6 +86,14 @@ static cl::opt<bool> EnableLocalReassignment( "may be compile time intensive"), cl::init(false)); +static cl::opt<bool> EnableDeferredSpilling( + "enable-deferred-spilling", cl::Hidden, + cl::desc("Instead of spilling a variable right away, defer the actual " + "code insertion to the end of the allocation. That way the " + "allocator might still find a suitable coloring for this " + "variable because of other evicted variables."), + cl::init(false)); + // FIXME: Find a good default for this flag and remove the flag. static cl::opt<unsigned> CSRFirstTimeCost("regalloc-csr-first-time-cost", @@ -157,6 +165,11 @@ class RAGreedy : public MachineFunctionPass, /// Live range will be spilled. No more splitting will be attempted. RS_Spill, + + /// Live range is in memory. Because of other evictions, it might get moved + /// in a register in the end. + RS_Memory, + /// There is nothing more we can do to this live range. Abort compilation /// if it can't be assigned. RS_Done @@ -414,6 +427,7 @@ const char *const RAGreedy::StageName[] = { "RS_Split", "RS_Split2", "RS_Spill", + "RS_Memory", "RS_Done" }; #endif @@ -447,8 +461,8 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired<MachineBlockFrequencyInfo>(); AU.addPreserved<MachineBlockFrequencyInfo>(); - AU.addRequired<AliasAnalysis>(); - AU.addPreserved<AliasAnalysis>(); + AU.addRequired<AAResultsWrapperPass>(); + AU.addPreserved<AAResultsWrapperPass>(); AU.addRequired<LiveIntervals>(); AU.addPreserved<LiveIntervals>(); AU.addRequired<SlotIndexes>(); @@ -536,6 +550,13 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { // Unsplit ranges that couldn't be allocated immediately are deferred until // everything else has been allocated. Prio = Size; + } else if (ExtraRegInfo[Reg].Stage == RS_Memory) { + // Memory operand should be considered last. + // Change the priority such that Memory operand are assigned in + // the reverse order that they came in. + // TODO: Make this a member variable and probably do something about hints. + static unsigned MemOp = 0; + Prio = MemOp++; } else { // Giant live ranges fall back to the global assignment heuristic, which // prevents excessive spilling in pathological cases. @@ -637,7 +658,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, //===----------------------------------------------------------------------===// unsigned RAGreedy::canReassign(LiveInterval &VirtReg, unsigned PrevReg) { - AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); + AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix); unsigned PhysReg; while ((PhysReg = Order.next())) { if (PhysReg == PrevReg) @@ -2450,7 +2471,7 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, unsigned Depth) { unsigned CostPerUseLimit = ~0u; // First try assigning a free register. - AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); + AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix); if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) { // When NewVRegs is not empty, we may have made decisions such as evicting // a virtual register, go with the earlier decisions and use the physical @@ -2512,13 +2533,23 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, return PhysReg; // Finally spill VirtReg itself. - NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled); - LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); - spiller().spill(LRE); - setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done); + if (EnableDeferredSpilling && getStage(VirtReg) < RS_Memory) { + // TODO: This is experimental and in particular, we do not model + // the live range splitting done by spilling correctly. + // We would need a deep integration with the spiller to do the + // right thing here. Anyway, that is still good for early testing. + setStage(VirtReg, RS_Memory); + DEBUG(dbgs() << "Do as if this register is in memory\n"); + NewVRegs.push_back(VirtReg.reg); + } else { + NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled); + LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); + spiller().spill(LRE); + setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done); - if (VerifyEnabled) - MF->verify(this, "After spilling"); + if (VerifyEnabled) + MF->verify(this, "After spilling"); + } // The live virtual register requesting allocation was spilled, so tell // the caller not to allocate anything during this round. @@ -2555,7 +2586,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { initializeCSRCost(); - calculateSpillWeightsAndHints(*LIS, mf, *Loops, *MBFI); + calculateSpillWeightsAndHints(*LIS, mf, VRM, *Loops, *MBFI); DEBUG(LIS->dump()); diff --git a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp index eeff73d0f2a0..fd28b05ed80a 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp @@ -47,6 +47,7 @@ #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/Printable.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -497,8 +498,8 @@ void PBQPRAConstraintList::anchor() {} void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const { au.setPreservesCFG(); - au.addRequired<AliasAnalysis>(); - au.addPreserved<AliasAnalysis>(); + au.addRequired<AAResultsWrapperPass>(); + au.addPreserved<AAResultsWrapperPass>(); au.addRequired<SlotIndexes>(); au.addPreserved<SlotIndexes>(); au.addRequired<LiveIntervals>(); @@ -724,11 +725,11 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { MachineBlockFrequencyInfo &MBFI = getAnalysis<MachineBlockFrequencyInfo>(); - calculateSpillWeightsAndHints(LIS, MF, getAnalysis<MachineLoopInfo>(), MBFI, - normalizePBQPSpillWeight); - VirtRegMap &VRM = getAnalysis<VirtRegMap>(); + calculateSpillWeightsAndHints(LIS, MF, &VRM, getAnalysis<MachineLoopInfo>(), + MBFI, normalizePBQPSpillWeight); + std::unique_ptr<Spiller> VRegSpiller(createInlineSpiller(*this, MF, VRM)); MF.getRegInfo().freezeReservedRegs(MF); @@ -805,33 +806,17 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { return true; } -namespace { -// A helper class for printing node and register info in a consistent way -class PrintNodeInfo { -public: - typedef PBQP::RegAlloc::PBQPRAGraph Graph; - typedef PBQP::RegAlloc::PBQPRAGraph::NodeId NodeId; - - PrintNodeInfo(NodeId NId, const Graph &G) : G(G), NId(NId) {} - - void print(raw_ostream &OS) const { +/// Create Printable object for node and register info. +static Printable PrintNodeInfo(PBQP::RegAlloc::PBQPRAGraph::NodeId NId, + const PBQP::RegAlloc::PBQPRAGraph &G) { + return Printable([NId, &G](raw_ostream &OS) { const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo(); const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); unsigned VReg = G.getNodeMetadata(NId).getVReg(); const char *RegClassName = TRI->getRegClassName(MRI.getRegClass(VReg)); OS << NId << " (" << RegClassName << ':' << PrintReg(VReg, TRI) << ')'; - } - -private: - const Graph &G; - NodeId NId; -}; - -inline raw_ostream &operator<<(raw_ostream &OS, const PrintNodeInfo &PR) { - PR.print(OS); - return OS; + }); } -} // anonymous namespace void PBQP::RegAlloc::PBQPRAGraph::dump(raw_ostream &OS) const { for (auto NId : nodeIds()) { diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp index c911b9b47ea2..e7b32179bde5 100644 --- a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -32,7 +32,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -93,7 +92,7 @@ namespace { /// A LaneMask to remember on which subregister live ranges we need to call /// shrinkToUses() later. - unsigned ShrinkMask; + LaneBitmask ShrinkMask; /// True if the main range of the currently coalesced intervals should be /// checked for smaller live intervals. @@ -164,15 +163,13 @@ namespace { /// LaneMask are split as necessary. @p LaneMask are the lanes that /// @p ToMerge will occupy in the coalescer register. @p LI has its subrange /// lanemasks already adjusted to the coalesced register. - /// @returns false if live range conflicts couldn't get resolved. - bool mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge, - unsigned LaneMask, CoalescerPair &CP); + void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge, + LaneBitmask LaneMask, CoalescerPair &CP); /// Join the liveranges of two subregisters. Joins @p RRange into /// @p LRange, @p RRange may be invalid afterwards. - /// @returns false if live range conflicts couldn't get resolved. - bool joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, - unsigned LaneMask, const CoalescerPair &CP); + void joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, + LaneBitmask LaneMask, const CoalescerPair &CP); /// We found a non-trivially-coalescable copy. If the source value number is /// defined by a copy from the destination reg see if we can merge these two @@ -224,30 +221,17 @@ namespace { /// Dst, we can drop \p Copy. bool applyTerminalRule(const MachineInstr &Copy) const; - /// Check whether or not \p LI is composed by multiple connected - /// components and if that is the case, fix that. - void splitNewRanges(LiveInterval *LI) { - ConnectedVNInfoEqClasses ConEQ(*LIS); - unsigned NumComps = ConEQ.Classify(LI); - if (NumComps <= 1) - return; - SmallVector<LiveInterval*, 8> NewComps(1, LI); - for (unsigned i = 1; i != NumComps; ++i) { - unsigned VReg = MRI->createVirtualRegister(MRI->getRegClass(LI->reg)); - NewComps.push_back(&LIS->createEmptyInterval(VReg)); - } - - ConEQ.Distribute(&NewComps[0], *MRI); - } - /// Wrapper method for \see LiveIntervals::shrinkToUses. /// This method does the proper fixing of the live-ranges when the afore /// mentioned method returns true. void shrinkToUses(LiveInterval *LI, SmallVectorImpl<MachineInstr * > *Dead = nullptr) { - if (LIS->shrinkToUses(LI, Dead)) - // We may have created multiple connected components, split them. - splitNewRanges(LI); + if (LIS->shrinkToUses(LI, Dead)) { + /// Check whether or not \p LI is composed by multiple connected + /// components and if that is the case, fix that. + SmallVector<LiveInterval*, 8> SplitLIs; + LIS->splitSeparateComponents(*LI, SplitLIs); + } } public: @@ -275,7 +259,7 @@ INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing", INITIALIZE_PASS_DEPENDENCY(LiveIntervals) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing", "Simple Register Coalescing", false, false) @@ -453,7 +437,7 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const { void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); - AU.addRequired<AliasAnalysis>(); + AU.addRequired<AAResultsWrapperPass>(); AU.addRequired<LiveIntervals>(); AU.addPreserved<LiveIntervals>(); AU.addPreserved<SlotIndexes>(); @@ -679,14 +663,18 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, unsigned UseOpIdx; if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx)) return false; - unsigned Op1, Op2, NewDstIdx; - if (!TII->findCommutedOpIndices(DefMI, Op1, Op2)) - return false; - if (Op1 == UseOpIdx) - NewDstIdx = Op2; - else if (Op2 == UseOpIdx) - NewDstIdx = Op1; - else + + // FIXME: The code below tries to commute 'UseOpIdx' operand with some other + // commutable operand which is expressed by 'CommuteAnyOperandIndex'value + // passed to the method. That _other_ operand is chosen by + // the findCommutedOpIndices() method. + // + // That is obviously an area for improvement in case of instructions having + // more than 2 operands. For example, if some instruction has 3 commutable + // operands then all possible variants (i.e. op#1<->op#2, op#1<->op#3, + // op#2<->op#3) of commute transformation should be considered/tried here. + unsigned NewDstIdx = TargetInstrInfo::CommuteAnyOperandIndex; + if (!TII->findCommutedOpIndices(DefMI, UseOpIdx, NewDstIdx)) return false; MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); @@ -719,7 +707,8 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, // At this point we have decided that it is legal to do this // transformation. Start by commuting the instruction. MachineBasicBlock *MBB = DefMI->getParent(); - MachineInstr *NewMI = TII->commuteInstruction(DefMI); + MachineInstr *NewMI = + TII->commuteInstruction(DefMI, false, UseOpIdx, NewDstIdx); if (!NewMI) return false; if (TargetRegisterInfo::isVirtualRegister(IntA.reg) && @@ -804,7 +793,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); if (IntB.hasSubRanges()) { if (!IntA.hasSubRanges()) { - unsigned Mask = MRI->getMaxLaneMaskForVReg(IntA.reg); + LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntA.reg); IntA.createSubRangeFrom(Allocator, Mask, IntA); } SlotIndex AIdx = CopyIdx.getRegSlot(true); @@ -812,20 +801,21 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, VNInfo *ASubValNo = SA.getVNInfoAt(AIdx); assert(ASubValNo != nullptr); - unsigned AMask = SA.LaneMask; + LaneBitmask AMask = SA.LaneMask; for (LiveInterval::SubRange &SB : IntB.subranges()) { - unsigned BMask = SB.LaneMask; - unsigned Common = BMask & AMask; + LaneBitmask BMask = SB.LaneMask; + LaneBitmask Common = BMask & AMask; if (Common == 0) continue; - DEBUG( - dbgs() << format("\t\tCopy+Merge %04X into %04X\n", BMask, Common)); - unsigned BRest = BMask & ~AMask; + DEBUG( dbgs() << "\t\tCopy_Merge " << PrintLaneMask(BMask) + << " into " << PrintLaneMask(Common) << '\n'); + LaneBitmask BRest = BMask & ~AMask; LiveInterval::SubRange *CommonRange; if (BRest != 0) { SB.LaneMask = BRest; - DEBUG(dbgs() << format("\t\tReduce Lane to %04X\n", BRest)); + DEBUG(dbgs() << "\t\tReduce Lane to " << PrintLaneMask(BRest) + << '\n'); // Duplicate SubRange for newly merged common stuff. CommonRange = IntB.createSubRangeFrom(Allocator, Common, SB); } else { @@ -842,7 +832,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, AMask &= ~BMask; } if (AMask != 0) { - DEBUG(dbgs() << format("\t\tNew Lane %04X\n", AMask)); + DEBUG(dbgs() << "\t\tNew Lane " << PrintLaneMask(AMask) << '\n'); LiveRange *NewRange = IntB.createSubRange(Allocator, AMask); VNInfo *BSubValNo = NewRange->getNextValue(CopyIdx, Allocator); addSegmentsWithValNo(*NewRange, BSubValNo, SA, ASubValNo); @@ -1107,7 +1097,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { const LiveInterval &SrcLI = LIS->getInterval(SrcReg); // CopyMI is undef iff SrcReg is not live before the instruction. if (SrcSubIdx != 0 && SrcLI.hasSubRanges()) { - unsigned SrcMask = TRI->getSubRegIndexLaneMask(SrcSubIdx); + LaneBitmask SrcMask = TRI->getSubRegIndexLaneMask(SrcSubIdx); for (const LiveInterval::SubRange &SR : SrcLI.subranges()) { if ((SR.LaneMask & SrcMask) == 0) continue; @@ -1128,7 +1118,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { DstLI.MergeValueNumberInto(VNI, PrevVNI); // The affected subregister segments can be removed. - unsigned DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx); + LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx); for (LiveInterval::SubRange &SR : DstLI.subranges()) { if ((SR.LaneMask & DstMask) == 0) continue; @@ -1147,7 +1137,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { continue; const MachineInstr &MI = *MO.getParent(); SlotIndex UseIdx = LIS->getInstructionIndex(&MI); - unsigned UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg()); + LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg()); bool isLive; if (UseMask != ~0u && DstLI.hasSubRanges()) { isLive = false; @@ -1213,10 +1203,10 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, if (SubIdx != 0 && MO.isUse() && MRI->shouldTrackSubRegLiveness(DstReg)) { if (!DstInt->hasSubRanges()) { BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); - unsigned Mask = MRI->getMaxLaneMaskForVReg(DstInt->reg); + LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(DstInt->reg); DstInt->createSubRangeFrom(Allocator, Mask, *DstInt); } - unsigned Mask = TRI->getSubRegIndexLaneMask(SubIdx); + LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubIdx); bool IsUndef = true; SlotIndex MIIdx = UseMI->isDebugValue() ? LIS->getSlotIndexes()->getIndexBefore(UseMI) @@ -1445,8 +1435,8 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { for (LiveInterval::SubRange &S : LI.subranges()) { if ((S.LaneMask & ShrinkMask) == 0) continue; - DEBUG(dbgs() << "Shrink LaneUses (Lane " - << format("%04X", S.LaneMask) << ")\n"); + DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask) + << ")\n"); LIS->shrinkToUses(S, LI.reg); } LI.removeEmptySubRanges(); @@ -1644,7 +1634,7 @@ class JoinVals { const unsigned SubIdx; /// The LaneMask that this liverange will occupy the coalesced register. May /// be smaller than the lanemask produced by SubIdx when merging subranges. - const unsigned LaneMask; + const LaneBitmask LaneMask; /// This is true when joining sub register ranges, false when joining main /// ranges. @@ -1699,11 +1689,11 @@ class JoinVals { ConflictResolution Resolution; /// Lanes written by this def, 0 for unanalyzed values. - unsigned WriteLanes; + LaneBitmask WriteLanes; /// Lanes with defined values in this register. Other lanes are undef and /// safe to clobber. - unsigned ValidLanes; + LaneBitmask ValidLanes; /// Value in LI being redefined by this def. VNInfo *RedefVNI; @@ -1744,7 +1734,7 @@ class JoinVals { /// Compute the bitmask of lanes actually written by DefMI. /// Set Redef if there are any partial register definitions that depend on the /// previous value of the register. - unsigned computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const; + LaneBitmask computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const; /// Find the ultimate value that VNI was copied from. std::pair<const VNInfo*,unsigned> followCopyChain(const VNInfo *VNI) const; @@ -1780,12 +1770,12 @@ class JoinVals { /// entry to TaintedVals. /// /// Returns false if the tainted lanes extend beyond the basic block. - bool taintExtent(unsigned, unsigned, JoinVals&, - SmallVectorImpl<std::pair<SlotIndex, unsigned> >&); + bool taintExtent(unsigned, LaneBitmask, JoinVals&, + SmallVectorImpl<std::pair<SlotIndex, LaneBitmask> >&); /// Return true if MI uses any of the given Lanes from Reg. /// This does not include partial redefinitions of Reg. - bool usesLanes(const MachineInstr *MI, unsigned, unsigned, unsigned) const; + bool usesLanes(const MachineInstr *MI, unsigned, unsigned, LaneBitmask) const; /// Determine if ValNo is a copy of a value number in LR or Other.LR that will /// be pruned: @@ -1796,7 +1786,7 @@ class JoinVals { bool isPrunedValue(unsigned ValNo, JoinVals &Other); public: - JoinVals(LiveRange &LR, unsigned Reg, unsigned SubIdx, unsigned LaneMask, + JoinVals(LiveRange &LR, unsigned Reg, unsigned SubIdx, LaneBitmask LaneMask, SmallVectorImpl<VNInfo*> &newVNInfo, const CoalescerPair &cp, LiveIntervals *lis, const TargetRegisterInfo *TRI, bool SubRangeJoin, bool TrackSubRegLiveness) @@ -1822,8 +1812,8 @@ public: /// Removes subranges starting at copies that get removed. This sometimes /// happens when undefined subranges are copied around. These ranges contain - /// no usefull information and can be removed. - void pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask); + /// no useful information and can be removed. + void pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask); /// Erase any machine instructions that have been coalesced away. /// Add erased instructions to ErasedInstrs. @@ -1840,9 +1830,9 @@ public: }; } // end anonymous namespace -unsigned JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) +LaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const { - unsigned L = 0; + LaneBitmask L = 0; for (const MachineOperand &MO : DefMI->operands()) { if (!MO.isReg() || MO.getReg() != Reg || !MO.isDef()) continue; @@ -1879,7 +1869,7 @@ std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain( ValueIn = nullptr; for (const LiveInterval::SubRange &S : LI.subranges()) { // Transform lanemask to a mask in the joined live interval. - unsigned SMask = TRI->composeSubRegIndexLaneMask(SubIdx, S.LaneMask); + LaneBitmask SMask = TRI->composeSubRegIndexLaneMask(SubIdx, S.LaneMask); if ((SMask & LaneMask) == 0) continue; LiveQueryResult LRQ = S.Query(Def); @@ -1928,7 +1918,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { const MachineInstr *DefMI = nullptr; if (VNI->isPHIDef()) { // Conservatively assume that all lanes in a PHI are valid. - unsigned Lanes = SubRangeJoin ? 1 : TRI->getSubRegIndexLaneMask(SubIdx); + LaneBitmask Lanes = SubRangeJoin ? 1 : TRI->getSubRegIndexLaneMask(SubIdx); V.ValidLanes = V.WriteLanes = Lanes; } else { DefMI = Indexes->getInstructionFromIndex(VNI->def); @@ -2190,8 +2180,8 @@ bool JoinVals::mapValues(JoinVals &Other) { } bool JoinVals:: -taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other, - SmallVectorImpl<std::pair<SlotIndex, unsigned> > &TaintExtent) { +taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other, + SmallVectorImpl<std::pair<SlotIndex, LaneBitmask> > &TaintExtent) { VNInfo *VNI = LR.getValNumInfo(ValNo); MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def); SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB); @@ -2230,7 +2220,7 @@ taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other, } bool JoinVals::usesLanes(const MachineInstr *MI, unsigned Reg, unsigned SubIdx, - unsigned Lanes) const { + LaneBitmask Lanes) const { if (MI->isDebugValue()) return false; for (const MachineOperand &MO : MI->operands()) { @@ -2264,8 +2254,8 @@ bool JoinVals::resolveConflicts(JoinVals &Other) { // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the // join, those lanes will be tainted with a wrong value. Get the extent of // the tainted lanes. - unsigned TaintedLanes = V.WriteLanes & OtherV.ValidLanes; - SmallVector<std::pair<SlotIndex, unsigned>, 8> TaintExtent; + LaneBitmask TaintedLanes = V.WriteLanes & OtherV.ValidLanes; + SmallVector<std::pair<SlotIndex, LaneBitmask>, 8> TaintExtent; if (!taintExtent(i, TaintedLanes, Other, TaintExtent)) // Tainted lanes would extend beyond the basic block. return false; @@ -2384,7 +2374,7 @@ void JoinVals::pruneValues(JoinVals &Other, } } -void JoinVals::pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask) +void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) { // Look for values being erased. bool DidPrune = false; @@ -2401,7 +2391,7 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask) // copied and we must remove that subrange value as well. VNInfo *ValueOut = Q.valueOutOrDead(); if (ValueOut != nullptr && Q.valueIn() == nullptr) { - DEBUG(dbgs() << "\t\tPrune sublane " << format("%04X", S.LaneMask) + DEBUG(dbgs() << "\t\tPrune sublane " << PrintLaneMask(S.LaneMask) << " at " << Def << "\n"); LIS->pruneValue(S, Def, nullptr); DidPrune = true; @@ -2410,10 +2400,10 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask) continue; } // If a subrange ends at the copy, then a value was copied but only - // partially used later. Shrink the subregister range apropriately. + // partially used later. Shrink the subregister range appropriately. if (Q.valueIn() != nullptr && Q.valueOut() == nullptr) { - DEBUG(dbgs() << "\t\tDead uses at sublane " - << format("%04X", S.LaneMask) << " at " << Def << "\n"); + DEBUG(dbgs() << "\t\tDead uses at sublane " << PrintLaneMask(S.LaneMask) + << " at " << Def << "\n"); ShrinkMask |= S.LaneMask; } } @@ -2477,8 +2467,8 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs, } } -bool RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, - unsigned LaneMask, +void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, + LaneBitmask LaneMask, const CoalescerPair &CP) { SmallVector<VNInfo*, 16> NewVNInfo; JoinVals RHSVals(RRange, CP.getSrcReg(), CP.getSrcIdx(), LaneMask, @@ -2492,13 +2482,15 @@ bool RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, // ranges get mapped to the "overflow" lane mask bit which creates unexpected // interferences. if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) { - DEBUG(dbgs() << "*** Couldn't join subrange!\n"); - return false; + // We already determined that it is legal to merge the intervals, so this + // should never fail. + llvm_unreachable("*** Couldn't join subrange!\n"); } if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals)) { - DEBUG(dbgs() << "*** Couldn't join subrange!\n"); - return false; + // We already determined that it is legal to merge the intervals, so this + // should never fail. + llvm_unreachable("*** Couldn't join subrange!\n"); } // The merging algorithm in LiveInterval::join() can't handle conflicting @@ -2521,36 +2513,37 @@ bool RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, DEBUG(dbgs() << "\t\tjoined lanes: " << LRange << "\n"); if (EndPoints.empty()) - return true; + return; // Recompute the parts of the live range we had to remove because of // CR_Replace conflicts. DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: " << LRange << '\n'); LIS->extendToIndices(LRange, EndPoints); - return true; } -bool RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI, +void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge, - unsigned LaneMask, CoalescerPair &CP) { + LaneBitmask LaneMask, + CoalescerPair &CP) { BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); for (LiveInterval::SubRange &R : LI.subranges()) { - unsigned RMask = R.LaneMask; + LaneBitmask RMask = R.LaneMask; // LaneMask of subregisters common to subrange R and ToMerge. - unsigned Common = RMask & LaneMask; + LaneBitmask Common = RMask & LaneMask; // There is nothing to do without common subregs. if (Common == 0) continue; - DEBUG(dbgs() << format("\t\tCopy+Merge %04X into %04X\n", RMask, Common)); + DEBUG(dbgs() << "\t\tCopy+Merge " << PrintLaneMask(RMask) << " into " + << PrintLaneMask(Common) << '\n'); // LaneMask of subregisters contained in the R range but not in ToMerge, // they have to split into their own subrange. - unsigned LRest = RMask & ~LaneMask; + LaneBitmask LRest = RMask & ~LaneMask; LiveInterval::SubRange *CommonRange; if (LRest != 0) { R.LaneMask = LRest; - DEBUG(dbgs() << format("\t\tReduce Lane to %04X\n", LRest)); + DEBUG(dbgs() << "\t\tReduce Lane to " << PrintLaneMask(LRest) << '\n'); // Duplicate SubRange for newly merged common stuff. CommonRange = LI.createSubRangeFrom(Allocator, Common, R); } else { @@ -2559,16 +2552,14 @@ bool RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI, CommonRange = &R; } LiveRange RangeCopy(ToMerge, Allocator); - if (!joinSubRegRanges(*CommonRange, RangeCopy, Common, CP)) - return false; + joinSubRegRanges(*CommonRange, RangeCopy, Common, CP); LaneMask &= ~RMask; } if (LaneMask != 0) { - DEBUG(dbgs() << format("\t\tNew Lane %04X\n", LaneMask)); + DEBUG(dbgs() << "\t\tNew Lane " << PrintLaneMask(LaneMask) << '\n'); LI.createSubRangeFrom(Allocator, LaneMask, ToMerge); } - return true; } bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { @@ -2602,15 +2593,15 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { // create initial subranges if necessary. unsigned DstIdx = CP.getDstIdx(); if (!LHS.hasSubRanges()) { - unsigned Mask = DstIdx == 0 ? CP.getNewRC()->getLaneMask() - : TRI->getSubRegIndexLaneMask(DstIdx); + LaneBitmask Mask = DstIdx == 0 ? CP.getNewRC()->getLaneMask() + : TRI->getSubRegIndexLaneMask(DstIdx); // LHS must support subregs or we wouldn't be in this codepath. assert(Mask != 0); LHS.createSubRangeFrom(Allocator, Mask, LHS); } else if (DstIdx != 0) { // Transform LHS lanemasks to new register class if necessary. for (LiveInterval::SubRange &R : LHS.subranges()) { - unsigned Mask = TRI->composeSubRegIndexLaneMask(DstIdx, R.LaneMask); + LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(DstIdx, R.LaneMask); R.LaneMask = Mask; } } @@ -2619,41 +2610,21 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { // Determine lanemasks of RHS in the coalesced register and merge subranges. unsigned SrcIdx = CP.getSrcIdx(); - bool Abort = false; if (!RHS.hasSubRanges()) { - unsigned Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask() - : TRI->getSubRegIndexLaneMask(SrcIdx); - if (!mergeSubRangeInto(LHS, RHS, Mask, CP)) - Abort = true; + LaneBitmask Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask() + : TRI->getSubRegIndexLaneMask(SrcIdx); + mergeSubRangeInto(LHS, RHS, Mask, CP); } else { // Pair up subranges and merge. for (LiveInterval::SubRange &R : RHS.subranges()) { - unsigned Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask); - if (!mergeSubRangeInto(LHS, R, Mask, CP)) { - Abort = true; - break; - } + LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask); + mergeSubRangeInto(LHS, R, Mask, CP); } } - if (Abort) { - // This shouldn't have happened :-( - // However we are aware of at least one existing problem where we - // can't merge subranges when multiple ranges end up in the - // "overflow bit" 32. As a workaround we drop all subregister ranges - // which means we loose some precision but are back to a well defined - // state. - assert(TargetRegisterInfo::isImpreciseLaneMask( - CP.getNewRC()->getLaneMask()) - && "SubRange merge should only fail when merging into bit 32."); - DEBUG(dbgs() << "\tSubrange join aborted!\n"); - LHS.clearSubRanges(); - RHS.clearSubRanges(); - } else { - DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n"); + DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n"); - LHSVals.pruneSubRegValues(LHS, ShrinkMask); - RHSVals.pruneSubRegValues(LHS, ShrinkMask); - } + LHSVals.pruneSubRegValues(LHS, ShrinkMask); + RHSVals.pruneSubRegValues(LHS, ShrinkMask); } // The merging algorithm in LiveInterval::join() can't handle conflicting @@ -2799,7 +2770,7 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const { !isTerminalReg(DstReg, Copy, MRI)) return false; - // DstReg is a terminal node. Check if it inteferes with any other + // DstReg is a terminal node. Check if it interferes with any other // copy involving SrcReg. const MachineBasicBlock *OrigBB = Copy.getParent(); const LiveInterval &DstLI = LIS->getInterval(DstReg); @@ -2904,7 +2875,7 @@ void RegisterCoalescer::joinAllIntervals() { std::vector<MBBPriorityInfo> MBBs; MBBs.reserve(MF->size()); for (MachineFunction::iterator I = MF->begin(), E = MF->end();I != E;++I){ - MachineBasicBlock *MBB = I; + MachineBasicBlock *MBB = &*I; MBBs.push_back(MBBPriorityInfo(MBB, Loops->getLoopDepth(MBB), JoinSplitEdges && isSplitEdge(MBB))); } @@ -2943,7 +2914,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { TRI = STI.getRegisterInfo(); TII = STI.getInstrInfo(); LIS = &getAnalysis<LiveIntervals>(); - AA = &getAnalysis<AliasAnalysis>(); + AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); Loops = &getAnalysis<MachineLoopInfo>(); if (EnableGlobalCopies == cl::BOU_UNSET) JoinGlobalCopies = STI.enableJoinGlobalCopies(); @@ -2981,22 +2952,25 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { if (MRI->recomputeRegClass(Reg)) { DEBUG(dbgs() << PrintReg(Reg) << " inflated to " << TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n'); + ++NumInflated; + LiveInterval &LI = LIS->getInterval(Reg); - unsigned MaxMask = MRI->getMaxLaneMaskForVReg(Reg); - if (MaxMask == 0) { + if (LI.hasSubRanges()) { // If the inflated register class does not support subregisters anymore // remove the subranges. - LI.clearSubRanges(); - } else { + if (!MRI->shouldTrackSubRegLiveness(Reg)) { + LI.clearSubRanges(); + } else { #ifndef NDEBUG - // If subranges are still supported, then the same subregs should still - // be supported. - for (LiveInterval::SubRange &S : LI.subranges()) { - assert ((S.LaneMask & ~MaxMask) == 0); - } + LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg); + // If subranges are still supported, then the same subregs + // should still be supported. + for (LiveInterval::SubRange &S : LI.subranges()) { + assert((S.LaneMask & ~MaxMask) == 0); + } #endif + } } - ++NumInflated; } } diff --git a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp index c3786e552a13..8382b0912bde 100644 --- a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp @@ -59,12 +59,12 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) const { dbgs() << "Max Pressure: "; dumpRegSetPressure(MaxSetPressure, TRI); dbgs() << "Live In: "; - for (unsigned i = 0, e = LiveInRegs.size(); i < e; ++i) - dbgs() << PrintVRegOrUnit(LiveInRegs[i], TRI) << " "; + for (unsigned Reg : LiveInRegs) + dbgs() << PrintVRegOrUnit(Reg, TRI) << " "; dbgs() << '\n'; dbgs() << "Live Out: "; - for (unsigned i = 0, e = LiveOutRegs.size(); i < e; ++i) - dbgs() << PrintVRegOrUnit(LiveOutRegs[i], TRI) << " "; + for (unsigned Reg : LiveOutRegs) + dbgs() << PrintVRegOrUnit(Reg, TRI) << " "; dbgs() << '\n'; } @@ -78,11 +78,13 @@ void RegPressureTracker::dump() const { } void PressureDiff::dump(const TargetRegisterInfo &TRI) const { + const char *sep = ""; for (const PressureChange &Change : *this) { - if (!Change.isValid() || Change.getUnitInc() == 0) - continue; - dbgs() << " " << TRI.getRegPressureSetName(Change.getPSet()) + if (!Change.isValid()) + break; + dbgs() << sep << TRI.getRegPressureSetName(Change.getPSet()) << " " << Change.getUnitInc(); + sep = " "; } dbgs() << '\n'; } @@ -90,8 +92,8 @@ void PressureDiff::dump(const TargetRegisterInfo &TRI) const { /// Increase the current pressure as impacted by these registers and bump /// the high water mark if needed. void RegPressureTracker::increaseRegPressure(ArrayRef<unsigned> RegUnits) { - for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { - PSetIterator PSetI = MRI->getPressureSets(RegUnits[i]); + for (unsigned RegUnit : RegUnits) { + PSetIterator PSetI = MRI->getPressureSets(RegUnit); unsigned Weight = PSetI.getWeight(); for (; PSetI.isValid(); ++PSetI) { CurrSetPressure[*PSetI] += Weight; @@ -104,8 +106,8 @@ void RegPressureTracker::increaseRegPressure(ArrayRef<unsigned> RegUnits) { /// Simply decrease the current pressure as impacted by these registers. void RegPressureTracker::decreaseRegPressure(ArrayRef<unsigned> RegUnits) { - for (unsigned I = 0, E = RegUnits.size(); I != E; ++I) - decreaseSetPressure(CurrSetPressure, MRI->getPressureSets(RegUnits[I])); + for (unsigned RegUnit : RegUnits) + decreaseSetPressure(CurrSetPressure, MRI->getPressureSets(RegUnit)); } /// Clear the result so it can be used for another round of pressure tracking. @@ -157,10 +159,22 @@ void RegionPressure::openBottom(MachineBasicBlock::const_iterator PrevBottom) { LiveInRegs.clear(); } -const LiveRange *RegPressureTracker::getLiveRange(unsigned Reg) const { +void LiveRegSet::init(const MachineRegisterInfo &MRI) { + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); + unsigned NumRegUnits = TRI.getNumRegs(); + unsigned NumVirtRegs = MRI.getNumVirtRegs(); + Regs.setUniverse(NumRegUnits + NumVirtRegs); + this->NumRegUnits = NumRegUnits; +} + +void LiveRegSet::clear() { + Regs.clear(); +} + +static const LiveRange *getLiveRange(const LiveIntervals &LIS, unsigned Reg) { if (TargetRegisterInfo::isVirtualRegister(Reg)) - return &LIS->getInterval(Reg); - return LIS->getCachedRegUnit(Reg); + return &LIS.getInterval(Reg); + return LIS.getCachedRegUnit(Reg); } void RegPressureTracker::reset() { @@ -176,8 +190,7 @@ void RegPressureTracker::reset() { else static_cast<RegionPressure&>(P).reset(); - LiveRegs.PhysRegs.clear(); - LiveRegs.VirtRegs.clear(); + LiveRegs.clear(); UntiedDefs.clear(); } @@ -210,8 +223,7 @@ void RegPressureTracker::init(const MachineFunction *mf, P.MaxSetPressure = CurrSetPressure; - LiveRegs.PhysRegs.setUniverse(TRI->getNumRegs()); - LiveRegs.VirtRegs.setUniverse(MRI->getNumVirtRegs()); + LiveRegs.init(*MRI); if (TrackUntiedDefs) UntiedDefs.setUniverse(MRI->getNumVirtRegs()); } @@ -250,14 +262,8 @@ void RegPressureTracker::closeTop() { static_cast<RegionPressure&>(P).TopPos = CurrPos; assert(P.LiveInRegs.empty() && "inconsistent max pressure result"); - P.LiveInRegs.reserve(LiveRegs.PhysRegs.size() + LiveRegs.VirtRegs.size()); - P.LiveInRegs.append(LiveRegs.PhysRegs.begin(), LiveRegs.PhysRegs.end()); - for (SparseSet<unsigned>::const_iterator I = - LiveRegs.VirtRegs.begin(), E = LiveRegs.VirtRegs.end(); I != E; ++I) - P.LiveInRegs.push_back(*I); - std::sort(P.LiveInRegs.begin(), P.LiveInRegs.end()); - P.LiveInRegs.erase(std::unique(P.LiveInRegs.begin(), P.LiveInRegs.end()), - P.LiveInRegs.end()); + P.LiveInRegs.reserve(LiveRegs.size()); + LiveRegs.appendTo(P.LiveInRegs); } /// Set the boundary for the bottom of the region and summarize live outs. @@ -268,21 +274,14 @@ void RegPressureTracker::closeBottom() { static_cast<RegionPressure&>(P).BottomPos = CurrPos; assert(P.LiveOutRegs.empty() && "inconsistent max pressure result"); - P.LiveOutRegs.reserve(LiveRegs.PhysRegs.size() + LiveRegs.VirtRegs.size()); - P.LiveOutRegs.append(LiveRegs.PhysRegs.begin(), LiveRegs.PhysRegs.end()); - for (SparseSet<unsigned>::const_iterator I = - LiveRegs.VirtRegs.begin(), E = LiveRegs.VirtRegs.end(); I != E; ++I) - P.LiveOutRegs.push_back(*I); - std::sort(P.LiveOutRegs.begin(), P.LiveOutRegs.end()); - P.LiveOutRegs.erase(std::unique(P.LiveOutRegs.begin(), P.LiveOutRegs.end()), - P.LiveOutRegs.end()); + P.LiveOutRegs.reserve(LiveRegs.size()); + LiveRegs.appendTo(P.LiveOutRegs); } /// Finalize the region boundaries and record live ins and live outs. void RegPressureTracker::closeRegion() { if (!isTopClosed() && !isBottomClosed()) { - assert(LiveRegs.PhysRegs.empty() && LiveRegs.VirtRegs.empty() && - "no region boundary"); + assert(LiveRegs.size() == 0 && "no region boundary"); return; } if (!isBottomClosed()) @@ -299,8 +298,7 @@ void RegPressureTracker::closeRegion() { void RegPressureTracker::initLiveThru(const RegPressureTracker &RPTracker) { LiveThruPressure.assign(TRI->getNumRegPressureSets(), 0); assert(isBottomClosed() && "need bottom-up tracking to intialize."); - for (unsigned i = 0, e = P.LiveOutRegs.size(); i < e; ++i) { - unsigned Reg = P.LiveOutRegs[i]; + for (unsigned Reg : P.LiveOutRegs) { if (TargetRegisterInfo::isVirtualRegister(Reg) && !RPTracker.hasUntiedDef(Reg)) { increaseSetPressure(LiveThruPressure, MRI->getPressureSets(Reg)); @@ -315,71 +313,113 @@ static bool containsReg(ArrayRef<unsigned> RegUnits, unsigned RegUnit) { } namespace { -/// Collect this instruction's unique uses and defs into SmallVectors for -/// processing defs and uses in order. -/// -/// FIXME: always ignore tied opers -class RegisterOperands { - const TargetRegisterInfo *TRI; - const MachineRegisterInfo *MRI; - bool IgnoreDead; +/// List of register defined and used by a machine instruction. +class RegisterOperands { public: SmallVector<unsigned, 8> Uses; SmallVector<unsigned, 8> Defs; SmallVector<unsigned, 8> DeadDefs; - RegisterOperands(const TargetRegisterInfo *tri, - const MachineRegisterInfo *mri, bool ID = false): - TRI(tri), MRI(mri), IgnoreDead(ID) {} + void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, + const MachineRegisterInfo &MRI, bool IgnoreDead = false); + + /// Use liveness information to find dead defs not marked with a dead flag + /// and move them to the DeadDefs vector. + void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS); +}; + +/// Collect this instruction's unique uses and defs into SmallVectors for +/// processing defs and uses in order. +/// +/// FIXME: always ignore tied opers +class RegisterOperandsCollector { + RegisterOperands &RegOpers; + const TargetRegisterInfo &TRI; + const MachineRegisterInfo &MRI; + bool IgnoreDead; + + RegisterOperandsCollector(RegisterOperands &RegOpers, + const TargetRegisterInfo &TRI, + const MachineRegisterInfo &MRI, + bool IgnoreDead) + : RegOpers(RegOpers), TRI(TRI), MRI(MRI), IgnoreDead(IgnoreDead) {} + + void collectInstr(const MachineInstr &MI) const { + for (ConstMIBundleOperands OperI(&MI); OperI.isValid(); ++OperI) + collectOperand(*OperI); + + // Remove redundant physreg dead defs. + SmallVectorImpl<unsigned>::iterator I = + std::remove_if(RegOpers.DeadDefs.begin(), RegOpers.DeadDefs.end(), + std::bind1st(std::ptr_fun(containsReg), RegOpers.Defs)); + RegOpers.DeadDefs.erase(I, RegOpers.DeadDefs.end()); + } - /// Push this operand's register onto the correct vector. - void collect(const MachineOperand &MO) { + /// Push this operand's register onto the correct vectors. + void collectOperand(const MachineOperand &MO) const { if (!MO.isReg() || !MO.getReg()) return; + unsigned Reg = MO.getReg(); if (MO.readsReg()) - pushRegUnits(MO.getReg(), Uses); + pushRegUnits(Reg, RegOpers.Uses); if (MO.isDef()) { if (MO.isDead()) { if (!IgnoreDead) - pushRegUnits(MO.getReg(), DeadDefs); - } - else - pushRegUnits(MO.getReg(), Defs); + pushRegUnits(Reg, RegOpers.DeadDefs); + } else + pushRegUnits(Reg, RegOpers.Defs); } } -protected: - void pushRegUnits(unsigned Reg, SmallVectorImpl<unsigned> &RegUnits) { + void pushRegUnits(unsigned Reg, SmallVectorImpl<unsigned> &RegUnits) const { if (TargetRegisterInfo::isVirtualRegister(Reg)) { if (containsReg(RegUnits, Reg)) return; RegUnits.push_back(Reg); - } - else if (MRI->isAllocatable(Reg)) { - for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { + } else if (MRI.isAllocatable(Reg)) { + for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) { if (containsReg(RegUnits, *Units)) continue; RegUnits.push_back(*Units); } } } -}; -} // namespace -/// Collect physical and virtual register operands. -static void collectOperands(const MachineInstr *MI, - RegisterOperands &RegOpers) { - for (ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI) - RegOpers.collect(*OperI); + friend class RegisterOperands; +}; - // Remove redundant physreg dead defs. - SmallVectorImpl<unsigned>::iterator I = - std::remove_if(RegOpers.DeadDefs.begin(), RegOpers.DeadDefs.end(), - std::bind1st(std::ptr_fun(containsReg), RegOpers.Defs)); - RegOpers.DeadDefs.erase(I, RegOpers.DeadDefs.end()); +void RegisterOperands::collect(const MachineInstr &MI, + const TargetRegisterInfo &TRI, + const MachineRegisterInfo &MRI, + bool IgnoreDead) { + RegisterOperandsCollector Collector(*this, TRI, MRI, IgnoreDead); + Collector.collectInstr(MI); +} + +void RegisterOperands::detectDeadDefs(const MachineInstr &MI, + const LiveIntervals &LIS) { + SlotIndex SlotIdx = LIS.getInstructionIndex(&MI); + for (SmallVectorImpl<unsigned>::iterator RI = Defs.begin(); + RI != Defs.end(); /*empty*/) { + unsigned Reg = *RI; + const LiveRange *LR = getLiveRange(LIS, Reg); + if (LR != nullptr) { + LiveQueryResult LRQ = LR->Query(SlotIdx); + if (LRQ.isDeadDef()) { + // LiveIntervals knows this is a dead even though it's MachineOperand is + // not flagged as such. + DeadDefs.push_back(Reg); + RI = Defs.erase(RI); + continue; + } + } + ++RI; + } } +} // namespace + /// Initialize an array of N PressureDiffs. void PressureDiffs::init(unsigned N) { Size = N; @@ -399,7 +439,7 @@ void PressureDiff::addPressureChange(unsigned RegUnit, bool IsDec, int Weight = IsDec ? -PSetI.getWeight() : PSetI.getWeight(); for (; PSetI.isValid(); ++PSetI) { // Find an existing entry in the pressure diff for this PSet. - PressureDiff::iterator I = begin(), E = end(); + PressureDiff::iterator I = nonconst_begin(), E = nonconst_end(); for (; I != E && I->isValid(); ++I) { if (I->getPSet() >= *PSetI) break; @@ -411,10 +451,20 @@ void PressureDiff::addPressureChange(unsigned RegUnit, bool IsDec, if (!I->isValid() || I->getPSet() != *PSetI) { PressureChange PTmp = PressureChange(*PSetI); for (PressureDiff::iterator J = I; J != E && PTmp.isValid(); ++J) - std::swap(*J,PTmp); + std::swap(*J, PTmp); } // Update the units for this pressure set. - I->setUnitInc(I->getUnitInc() + Weight); + unsigned NewUnitInc = I->getUnitInc() + Weight; + if (NewUnitInc != 0) { + I->setUnitInc(NewUnitInc); + } else { + // Remove entry + PressureDiff::iterator J; + for (J = std::next(I); J != E && J->isValid(); ++J, ++I) + *I = *J; + if (J != E) + *I = *J; + } } } @@ -423,18 +473,18 @@ static void collectPDiff(PressureDiff &PDiff, RegisterOperands &RegOpers, const MachineRegisterInfo *MRI) { assert(!PDiff.begin()->isValid() && "stale PDiff"); - for (unsigned i = 0, e = RegOpers.Defs.size(); i != e; ++i) - PDiff.addPressureChange(RegOpers.Defs[i], true, MRI); + for (unsigned Reg : RegOpers.Defs) + PDiff.addPressureChange(Reg, true, MRI); - for (unsigned i = 0, e = RegOpers.Uses.size(); i != e; ++i) - PDiff.addPressureChange(RegOpers.Uses[i], false, MRI); + for (unsigned Reg : RegOpers.Uses) + PDiff.addPressureChange(Reg, false, MRI); } /// Force liveness of registers. void RegPressureTracker::addLiveRegs(ArrayRef<unsigned> Regs) { - for (unsigned i = 0, e = Regs.size(); i != e; ++i) { - if (LiveRegs.insert(Regs[i])) - increaseRegPressure(Regs[i]); + for (unsigned Reg : Regs) { + if (LiveRegs.insert(Reg)) + increaseRegPressure(Reg); } } @@ -465,13 +515,9 @@ void RegPressureTracker::discoverLiveOut(unsigned Reg) { /// registers that are both defined and used by the instruction. If a pressure /// difference pointer is provided record the changes is pressure caused by this /// instruction independent of liveness. -bool RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses, +void RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses, PressureDiff *PDiff) { - // Check for the top of the analyzable region. - if (CurrPos == MBB->begin()) { - closeRegion(); - return false; - } + assert(CurrPos != MBB->begin()); if (!isBottomClosed()) closeBottom(); @@ -483,11 +529,8 @@ bool RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses, do --CurrPos; while (CurrPos != MBB->begin() && CurrPos->isDebugValue()); + assert(!CurrPos->isDebugValue()); - if (CurrPos->isDebugValue()) { - closeRegion(); - return false; - } SlotIndex SlotIdx; if (RequireIntervals) SlotIdx = LIS->getInstructionIndex(CurrPos).getRegSlot(); @@ -496,8 +539,11 @@ bool RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses, if (RequireIntervals && isTopClosed()) static_cast<IntervalPressure&>(P).openTop(SlotIdx); - RegisterOperands RegOpers(TRI, MRI); - collectOperands(CurrPos, RegOpers); + const MachineInstr &MI = *CurrPos; + RegisterOperands RegOpers; + RegOpers.collect(MI, *TRI, *MRI); + if (RequireIntervals) + RegOpers.detectDeadDefs(MI, *LIS); if (PDiff) collectPDiff(*PDiff, RegOpers, MRI); @@ -508,37 +554,19 @@ bool RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses, // Kill liveness at live defs. // TODO: consider earlyclobbers? - for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) { - unsigned Reg = RegOpers.Defs[i]; - bool DeadDef = false; - if (RequireIntervals) { - const LiveRange *LR = getLiveRange(Reg); - if (LR) { - LiveQueryResult LRQ = LR->Query(SlotIdx); - DeadDef = LRQ.isDeadDef(); - } - } - if (DeadDef) { - // LiveIntervals knows this is a dead even though it's MachineOperand is - // not flagged as such. Since this register will not be recorded as - // live-out, increase its PDiff value to avoid underflowing pressure. - if (PDiff) - PDiff->addPressureChange(Reg, false, MRI); - } else { - if (LiveRegs.erase(Reg)) - decreaseRegPressure(Reg); - else - discoverLiveOut(Reg); - } + for (unsigned Reg : RegOpers.Defs) { + if (LiveRegs.erase(Reg)) + decreaseRegPressure(Reg); + else + discoverLiveOut(Reg); } // Generate liveness for uses. - for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) { - unsigned Reg = RegOpers.Uses[i]; + for (unsigned Reg : RegOpers.Uses) { if (!LiveRegs.contains(Reg)) { // Adjust liveouts if LiveIntervals are available. if (RequireIntervals) { - const LiveRange *LR = getLiveRange(Reg); + const LiveRange *LR = getLiveRange(*LIS, Reg); if (LR) { LiveQueryResult LRQ = LR->Query(SlotIdx); if (!LRQ.isKill() && !LRQ.valueDefined()) @@ -552,24 +580,18 @@ bool RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses, } } if (TrackUntiedDefs) { - for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) { - unsigned Reg = RegOpers.Defs[i]; + for (unsigned Reg : RegOpers.Defs) { if (TargetRegisterInfo::isVirtualRegister(Reg) && !LiveRegs.contains(Reg)) UntiedDefs.insert(Reg); } } - return true; } /// Advance across the current instruction. -bool RegPressureTracker::advance() { +void RegPressureTracker::advance() { assert(!TrackUntiedDefs && "unsupported mode"); - // Check for the bottom of the analyzable region. - if (CurrPos == MBB->end()) { - closeRegion(); - return false; - } + assert(CurrPos != MBB->end()); if (!isTopClosed()) closeTop(); @@ -585,11 +607,10 @@ bool RegPressureTracker::advance() { static_cast<RegionPressure&>(P).openBottom(CurrPos); } - RegisterOperands RegOpers(TRI, MRI); - collectOperands(CurrPos, RegOpers); + RegisterOperands RegOpers; + RegOpers.collect(*CurrPos, *TRI, *MRI); - for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) { - unsigned Reg = RegOpers.Uses[i]; + for (unsigned Reg : RegOpers.Uses) { // Discover live-ins. bool isLive = LiveRegs.contains(Reg); if (!isLive) @@ -597,24 +618,21 @@ bool RegPressureTracker::advance() { // Kill liveness at last uses. bool lastUse = false; if (RequireIntervals) { - const LiveRange *LR = getLiveRange(Reg); + const LiveRange *LR = getLiveRange(*LIS, Reg); lastUse = LR && LR->Query(SlotIdx).isKill(); - } - else { + } else { // Allocatable physregs are always single-use before register rewriting. lastUse = !TargetRegisterInfo::isVirtualRegister(Reg); } if (lastUse && isLive) { LiveRegs.erase(Reg); decreaseRegPressure(Reg); - } - else if (!lastUse && !isLive) + } else if (!lastUse && !isLive) increaseRegPressure(Reg); } // Generate liveness for defs. - for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) { - unsigned Reg = RegOpers.Defs[i]; + for (unsigned Reg : RegOpers.Defs) { if (LiveRegs.insert(Reg)) increaseRegPressure(Reg); } @@ -627,7 +645,6 @@ bool RegPressureTracker::advance() { do ++CurrPos; while (CurrPos != MBB->end() && CurrPos->isDebugValue()); - return true; } /// Find the max change in excess pressure across all sets. @@ -653,8 +670,7 @@ static void computeExcessPressureDelta(ArrayRef<unsigned> OldPressureVec, PDiff = 0; // Under the limit else PDiff = PNew - Limit; // Just exceeded limit. - } - else if (Limit > PNew) + } else if (Limit > PNew) PDiff = Limit - POld; // Just obeyed limit. if (PDiff) { @@ -719,34 +735,19 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) { assert(!MI->isDebugValue() && "Expect a nondebug instruction."); // Account for register pressure similar to RegPressureTracker::recede(). - RegisterOperands RegOpers(TRI, MRI, /*IgnoreDead=*/true); - collectOperands(MI, RegOpers); - - // Boost max pressure for all dead defs together. - // Since CurrSetPressure and MaxSetPressure - increaseRegPressure(RegOpers.DeadDefs); - decreaseRegPressure(RegOpers.DeadDefs); + RegisterOperands RegOpers; + RegOpers.collect(*MI, *TRI, *MRI, /*IgnoreDead=*/true); + assert(RegOpers.DeadDefs.size() == 0); + if (RequireIntervals) + RegOpers.detectDeadDefs(*MI, *LIS); // Kill liveness at live defs. - for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) { - unsigned Reg = RegOpers.Defs[i]; - bool DeadDef = false; - if (RequireIntervals) { - const LiveRange *LR = getLiveRange(Reg); - if (LR) { - SlotIndex SlotIdx = LIS->getInstructionIndex(MI); - LiveQueryResult LRQ = LR->Query(SlotIdx); - DeadDef = LRQ.isDeadDef(); - } - } - if (!DeadDef) { - if (!containsReg(RegOpers.Uses, Reg)) - decreaseRegPressure(Reg); - } + for (unsigned Reg : RegOpers.Defs) { + if (!containsReg(RegOpers.Uses, Reg)) + decreaseRegPressure(Reg); } // Generate liveness for uses. - for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) { - unsigned Reg = RegOpers.Uses[i]; + for (unsigned Reg : RegOpers.Uses) { if (!LiveRegs.contains(Reg)) increaseRegPressure(Reg); } @@ -853,7 +854,8 @@ getUpwardPressureDelta(const MachineInstr *MI, /*const*/ PressureDiff &PDiff, unsigned MNew = MOld; // Ignore DeadDefs here because they aren't captured by PressureChange. unsigned PNew = POld + PDiffI->getUnitInc(); - assert((PDiffI->getUnitInc() >= 0) == (PNew >= POld) && "PSet overflow"); + assert((PDiffI->getUnitInc() >= 0) == (PNew >= POld) + && "PSet overflow/underflow"); if (PNew > MOld) MNew = PNew; // Check if current pressure has exceeded the limit. @@ -892,19 +894,13 @@ getUpwardPressureDelta(const MachineInstr *MI, /*const*/ PressureDiff &PDiff, } /// Helper to find a vreg use between two indices [PriorUseIdx, NextUseIdx). -static bool findUseBetween(unsigned Reg, - SlotIndex PriorUseIdx, SlotIndex NextUseIdx, - const MachineRegisterInfo *MRI, +static bool findUseBetween(unsigned Reg, SlotIndex PriorUseIdx, + SlotIndex NextUseIdx, const MachineRegisterInfo &MRI, const LiveIntervals *LIS) { - for (MachineRegisterInfo::use_instr_nodbg_iterator - UI = MRI->use_instr_nodbg_begin(Reg), - UE = MRI->use_instr_nodbg_end(); UI != UE; ++UI) { - const MachineInstr* MI = &*UI; - if (MI->isDebugValue()) - continue; - SlotIndex InstSlot = LIS->getInstructionIndex(MI).getRegSlot(); - if (InstSlot >= PriorUseIdx && InstSlot < NextUseIdx) - return true; + for (const MachineInstr &MI : MRI.use_nodbg_instructions(Reg)) { + SlotIndex InstSlot = LIS->getInstructionIndex(&MI).getRegSlot(); + if (InstSlot >= PriorUseIdx && InstSlot < NextUseIdx) + return true; } return false; } @@ -919,8 +915,8 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) { assert(!MI->isDebugValue() && "Expect a nondebug instruction."); // Account for register pressure similar to RegPressureTracker::recede(). - RegisterOperands RegOpers(TRI, MRI); - collectOperands(MI, RegOpers); + RegisterOperands RegOpers; + RegOpers.collect(*MI, *TRI, *MRI); // Kill liveness at last uses. Assume allocatable physregs are single-use // rather than checking LiveIntervals. @@ -928,21 +924,18 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) { if (RequireIntervals) SlotIdx = LIS->getInstructionIndex(MI).getRegSlot(); - for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) { - unsigned Reg = RegOpers.Uses[i]; + for (unsigned Reg : RegOpers.Uses) { if (RequireIntervals) { // FIXME: allow the caller to pass in the list of vreg uses that remain // to be bottom-scheduled to avoid searching uses at each query. SlotIndex CurrIdx = getCurrSlot(); - const LiveRange *LR = getLiveRange(Reg); + const LiveRange *LR = getLiveRange(*LIS, Reg); if (LR) { LiveQueryResult LRQ = LR->Query(SlotIdx); - if (LRQ.isKill() && !findUseBetween(Reg, CurrIdx, SlotIdx, MRI, LIS)) { + if (LRQ.isKill() && !findUseBetween(Reg, CurrIdx, SlotIdx, *MRI, LIS)) decreaseRegPressure(Reg); - } } - } - else if (!TargetRegisterInfo::isVirtualRegister(Reg)) { + } else if (!TargetRegisterInfo::isVirtualRegister(Reg)) { // Allocatable physregs are always single-use before register rewriting. decreaseRegPressure(Reg); } @@ -966,7 +959,7 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) { /// This is expensive for an on-the-fly query because it calls /// bumpDownwardPressure to recompute the pressure sets based on current /// liveness. We don't yet have a fast version of downward pressure tracking -/// analagous to getUpwardPressureDelta. +/// analogous to getUpwardPressureDelta. void RegPressureTracker:: getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, ArrayRef<PressureChange> CriticalPSets, diff --git a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp index 4176686d1f7f..8fa1bf74b7e2 100644 --- a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp @@ -31,9 +31,12 @@ using namespace llvm; #define DEBUG_TYPE "reg-scavenging" /// setUsed - Set the register units of this register as used. -void RegScavenger::setRegUsed(unsigned Reg) { - for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) - RegUnitsAvailable.reset(*RUI); +void RegScavenger::setRegUsed(unsigned Reg, LaneBitmask LaneMask) { + for (MCRegUnitMaskIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) { + LaneBitmask UnitMask = (*RUI).second; + if (UnitMask == 0 || (LaneMask & UnitMask) != 0) + RegUnitsAvailable.reset((*RUI).first); + } } void RegScavenger::initRegState() { @@ -50,9 +53,8 @@ void RegScavenger::initRegState() { return; // Live-in registers are in use. - for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(), - E = MBB->livein_end(); I != E; ++I) - setRegUsed(*I); + for (const auto &LI : MBB->liveins()) + setRegUsed(LI.PhysReg, LI.LaneMask); // Pristine CSRs are also unavailable. const MachineFunction &MF = *MBB->getParent(); diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp index 76a7fef58fcc..efde61ece639 100644 --- a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp +++ b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp @@ -372,7 +372,6 @@ void SUnit::dumpAll(const ScheduleDAG *G) const { dbgs() << "\n"; } } - dbgs() << "\n"; } #endif diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index 390b6d25954e..fb82ab7a5555 100644 --- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -13,12 +13,12 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/ADT/IntEqClasses.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -51,15 +51,11 @@ static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo *mli, - bool IsPostRAFlag, bool RemoveKillFlags, - LiveIntervals *lis) - : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), LIS(lis), - IsPostRA(IsPostRAFlag), RemoveKillFlags(RemoveKillFlags), - CanHandleTerminators(false), FirstDbgValue(nullptr) { - assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals"); + bool RemoveKillFlags) + : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), + RemoveKillFlags(RemoveKillFlags), CanHandleTerminators(false), + TrackLaneMasks(false), FirstDbgValue(nullptr) { DbgValues.clear(); - assert(!(IsPostRA && MRI.getNumVirtRegs()) && - "Virtual registers must be removed prior to PostRA scheduling"); const TargetSubtargetInfo &ST = mf.getSubtarget(); SchedModel.init(ST.getSchedModel(), &ST, TII); @@ -230,11 +226,8 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() { if (TRI->isPhysicalRegister(Reg)) Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg)); - else { - assert(!IsPostRA && "Virtual register encountered after regalloc."); - if (MO.readsReg()) // ignore undef operands - addVRegUseDeps(&ExitSU, i); - } + else if (MO.readsReg()) // ignore undef operands + addVRegUseDeps(&ExitSU, i); } } else { // For others, e.g. fallthrough, conditional branch, assume the exit @@ -242,11 +235,9 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() { assert(Uses.empty() && "Uses in set before adding deps?"); for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) - for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), - E = (*SI)->livein_end(); I != E; ++I) { - unsigned Reg = *I; - if (!Uses.contains(Reg)) - Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg)); + for (const auto &LI : (*SI)->liveins()) { + if (!Uses.contains(LI.PhysReg)) + Uses.insert(PhysRegSUOper(&ExitSU, -1, LI.PhysReg)); } } } @@ -371,6 +362,20 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { } } +LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const +{ + unsigned Reg = MO.getReg(); + // No point in tracking lanemasks if we don't have interesting subregisters. + const TargetRegisterClass &RC = *MRI.getRegClass(Reg); + if (!RC.HasDisjunctSubRegs) + return ~0u; + + unsigned SubReg = MO.getSubReg(); + if (SubReg == 0) + return RC.getLaneMask(); + return TRI->getSubRegIndexLaneMask(SubReg); +} + /// addVRegDefDeps - Add register output and data dependencies from this SUnit /// to instructions that occur later in the same scheduling region if they read /// from or write to the virtual register defined at OperIdx. @@ -378,35 +383,106 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { /// TODO: Hoist loop induction variable increments. This has to be /// reevaluated. Generally, IV scheduling should be done before coalescing. void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { - const MachineInstr *MI = SU->getInstr(); - unsigned Reg = MI->getOperand(OperIdx).getReg(); + MachineInstr *MI = SU->getInstr(); + MachineOperand &MO = MI->getOperand(OperIdx); + unsigned Reg = MO.getReg(); + + LaneBitmask DefLaneMask; + LaneBitmask KillLaneMask; + if (TrackLaneMasks) { + bool IsKill = MO.getSubReg() == 0 || MO.isUndef(); + DefLaneMask = getLaneMaskForMO(MO); + // If we have a <read-undef> flag, none of the lane values comes from an + // earlier instruction. + KillLaneMask = IsKill ? ~0u : DefLaneMask; + + // Clear undef flag, we'll re-add it later once we know which subregister + // Def is first. + MO.setIsUndef(false); + } else { + DefLaneMask = ~0u; + KillLaneMask = ~0u; + } + + if (MO.isDead()) { + assert(CurrentVRegUses.find(Reg) == CurrentVRegUses.end() && + "Dead defs should have no uses"); + } else { + // Add data dependence to all uses we found so far. + const TargetSubtargetInfo &ST = MF.getSubtarget(); + for (VReg2SUnitOperIdxMultiMap::iterator I = CurrentVRegUses.find(Reg), + E = CurrentVRegUses.end(); I != E; /*empty*/) { + LaneBitmask LaneMask = I->LaneMask; + // Ignore uses of other lanes. + if ((LaneMask & KillLaneMask) == 0) { + ++I; + continue; + } + + if ((LaneMask & DefLaneMask) != 0) { + SUnit *UseSU = I->SU; + MachineInstr *Use = UseSU->getInstr(); + SDep Dep(SU, SDep::Data, Reg); + Dep.setLatency(SchedModel.computeOperandLatency(MI, OperIdx, Use, + I->OperandIndex)); + ST.adjustSchedDependency(SU, UseSU, Dep); + UseSU->addPred(Dep); + } + + LaneMask &= ~KillLaneMask; + // If we found a Def for all lanes of this use, remove it from the list. + if (LaneMask != 0) { + I->LaneMask = LaneMask; + ++I; + } else + I = CurrentVRegUses.erase(I); + } + } - // Singly defined vregs do not have output/anti dependencies. - // The current operand is a def, so we have at least one. - // Check here if there are any others... + // Shortcut: Singly defined vregs do not have output/anti dependencies. if (MRI.hasOneDef(Reg)) return; - // Add output dependence to the next nearest def of this vreg. + // Add output dependence to the next nearest defs of this vreg. // // Unless this definition is dead, the output dependence should be // transitively redundant with antidependencies from this definition's // uses. We're conservative for now until we have a way to guarantee the uses // are not eliminated sometime during scheduling. The output dependence edge // is also useful if output latency exceeds def-use latency. - VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg); - if (DefI == VRegDefs.end()) - VRegDefs.insert(VReg2SUnit(Reg, SU)); - else { - SUnit *DefSU = DefI->SU; - if (DefSU != SU && DefSU != &ExitSU) { - SDep Dep(SU, SDep::Output, Reg); - Dep.setLatency( - SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr())); - DefSU->addPred(Dep); - } - DefI->SU = SU; + LaneBitmask LaneMask = DefLaneMask; + for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg), + CurrentVRegDefs.end())) { + // Ignore defs for other lanes. + if ((V2SU.LaneMask & LaneMask) == 0) + continue; + // Add an output dependence. + SUnit *DefSU = V2SU.SU; + // Ignore additional defs of the same lanes in one instruction. This can + // happen because lanemasks are shared for targets with too many + // subregisters. We also use some representration tricks/hacks where we + // add super-register defs/uses, to imply that although we only access parts + // of the reg we care about the full one. + if (DefSU == SU) + continue; + SDep Dep(SU, SDep::Output, Reg); + Dep.setLatency( + SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr())); + DefSU->addPred(Dep); + + // Update current definition. This can get tricky if the def was about a + // bigger lanemask before. We then have to shrink it and create a new + // VReg2SUnit for the non-overlapping part. + LaneBitmask OverlapMask = V2SU.LaneMask & LaneMask; + LaneBitmask NonOverlapMask = V2SU.LaneMask & ~LaneMask; + if (NonOverlapMask != 0) + CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, V2SU.SU)); + V2SU.SU = SU; + V2SU.LaneMask = OverlapMask; } + // If there was no CurrentVRegDefs entry for some lanes yet, create one. + if (LaneMask != 0) + CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU)); } /// addVRegUseDeps - Add a register data dependency if the instruction that @@ -416,59 +492,34 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { /// /// TODO: Handle ExitSU "uses" properly. void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { - MachineInstr *MI = SU->getInstr(); - unsigned Reg = MI->getOperand(OperIdx).getReg(); + const MachineInstr *MI = SU->getInstr(); + const MachineOperand &MO = MI->getOperand(OperIdx); + unsigned Reg = MO.getReg(); + + // Remember the use. Data dependencies will be added when we find the def. + LaneBitmask LaneMask = TrackLaneMasks ? getLaneMaskForMO(MO) : ~0u; + CurrentVRegUses.insert(VReg2SUnitOperIdx(Reg, LaneMask, OperIdx, SU)); + + // Add antidependences to the following defs of the vreg. + for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg), + CurrentVRegDefs.end())) { + // Ignore defs for unrelated lanes. + LaneBitmask PrevDefLaneMask = V2SU.LaneMask; + if ((PrevDefLaneMask & LaneMask) == 0) + continue; + if (V2SU.SU == SU) + continue; - // Record this local VReg use. - VReg2UseMap::iterator UI = VRegUses.find(Reg); - for (; UI != VRegUses.end(); ++UI) { - if (UI->SU == SU) - break; + V2SU.SU->addPred(SDep(SU, SDep::Anti, Reg)); } - if (UI == VRegUses.end()) - VRegUses.insert(VReg2SUnit(Reg, SU)); - - // Lookup this operand's reaching definition. - assert(LIS && "vreg dependencies requires LiveIntervals"); - LiveQueryResult LRQ - = LIS->getInterval(Reg).Query(LIS->getInstructionIndex(MI)); - VNInfo *VNI = LRQ.valueIn(); - - // VNI will be valid because MachineOperand::readsReg() is checked by caller. - assert(VNI && "No value to read by operand"); - MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def); - // Phis and other noninstructions (after coalescing) have a NULL Def. - if (Def) { - SUnit *DefSU = getSUnit(Def); - if (DefSU) { - // The reaching Def lives within this scheduling region. - // Create a data dependence. - SDep dep(DefSU, SDep::Data, Reg); - // Adjust the dependence latency using operand def/use information, then - // allow the target to perform its own adjustments. - int DefOp = Def->findRegisterDefOperandIdx(Reg); - dep.setLatency(SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx)); - - const TargetSubtargetInfo &ST = MF.getSubtarget(); - ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep)); - SU->addPred(dep); - } - } - - // Add antidependence to the following def of the vreg it uses. - VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg); - if (DefI != VRegDefs.end() && DefI->SU != SU) - DefI->SU->addPred(SDep(SU, SDep::Anti, Reg)); } /// Return true if MI is an instruction we are unable to reason about /// (like a call or something with unmodeled side effects). static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) { - if (MI->isCall() || MI->hasUnmodeledSideEffects() || - (MI->hasOrderedMemoryRef() && - (!MI->mayLoad() || !MI->isInvariantLoad(AA)))) - return true; - return false; + return MI->isCall() || MI->hasUnmodeledSideEffects() || + (MI->hasOrderedMemoryRef() && + (!MI->mayLoad() || !MI->isInvariantLoad(AA))); } // This MI might have either incomplete info, or known to be unsafe @@ -508,7 +559,7 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI, return false; } -/// This returns true if the two MIs need a chain edge betwee them. +/// This returns true if the two MIs need a chain edge between them. /// If these are not even memory operations, we still may need /// chain deps between them. The question really is - could /// these two MIs be reordered during scheduling from memory dependency @@ -670,7 +721,7 @@ static inline void addChainDependency(AliasAnalysis *AA, unsigned TrueMemOrderLatency = 0, bool isNormalMemory = false) { // If this is a false dependency, - // do not add the edge, but rememeber the rejected node. + // do not add the edge, but remember the rejected node. if (MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) { SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier); Dep.setLatency(TrueMemOrderLatency); @@ -685,7 +736,7 @@ static inline void addChainDependency(AliasAnalysis *AA, } } -/// Create an SUnit for each real instruction, numbered in top-down toplological +/// Create an SUnit for each real instruction, numbered in top-down topological /// order. The instruction order A < B, implies that no edge exists from B to A. /// /// Map each real instruction to its SUnit. @@ -743,17 +794,44 @@ void ScheduleDAGInstrs::initSUnits() { } } +void ScheduleDAGInstrs::collectVRegUses(SUnit *SU) { + const MachineInstr *MI = SU->getInstr(); + for (const MachineOperand &MO : MI->operands()) { + if (!MO.isReg()) + continue; + if (!MO.readsReg()) + continue; + if (TrackLaneMasks && !MO.isUse()) + continue; + + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + + // Record this local VReg use. + VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg); + for (; UI != VRegUses.end(); ++UI) { + if (UI->SU == SU) + break; + } + if (UI == VRegUses.end()) + VRegUses.insert(VReg2SUnit(Reg, 0, SU)); + } +} + /// If RegPressure is non-null, compute register pressure as a side effect. The /// DAG builder is an efficient place to do it because it already visits /// operands. void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, RegPressureTracker *RPTracker, - PressureDiffs *PDiffs) { + PressureDiffs *PDiffs, + bool TrackLaneMasks) { const TargetSubtargetInfo &ST = MF.getSubtarget(); bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI : ST.useAA(); AliasAnalysis *AAForDep = UseAA ? AA : nullptr; + this->TrackLaneMasks = TrackLaneMasks; MISUnitMap.clear(); ScheduleDAG::clearDAG(); @@ -766,7 +844,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // We build scheduling units by walking a block's instruction list from bottom // to top. - // Remember where a generic side-effecting instruction is as we procede. + // Remember where a generic side-effecting instruction is as we proceed. SUnit *BarrierChain = nullptr, *AliasChain = nullptr; // Memory references to specific known memory locations are tracked @@ -787,10 +865,14 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, Defs.setUniverse(TRI->getNumRegs()); Uses.setUniverse(TRI->getNumRegs()); - assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs"); + assert(CurrentVRegDefs.empty() && "nobody else should use CurrentVRegDefs"); + assert(CurrentVRegUses.empty() && "nobody else should use CurrentVRegUses"); + unsigned NumVirtRegs = MRI.getNumVirtRegs(); + CurrentVRegDefs.setUniverse(NumVirtRegs); + CurrentVRegUses.setUniverse(NumVirtRegs); + VRegUses.clear(); - VRegDefs.setUniverse(MRI.getNumVirtRegs()); - VRegUses.setUniverse(MRI.getNumVirtRegs()); + VRegUses.setUniverse(NumVirtRegs); // Model data dependencies between instructions being scheduled and the // ExitSU. @@ -818,6 +900,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, RPTracker->recede(/*LiveUses=*/nullptr, PDiff); assert(RPTracker->getPos() == std::prev(MII) && "RPTracker can't find MI"); + collectVRegUses(SU); } assert( @@ -835,7 +918,6 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, if (TRI->isPhysicalRegister(Reg)) addPhysRegDeps(SU, j); else { - assert(!IsPostRA && "Virtual register encountered!"); if (MO.isDef()) { HasVRegDef = true; addVRegDefDeps(SU, j); @@ -890,7 +972,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, BarrierChain = SU; // This is a barrier event that acts as a pivotal node in the DAG, // so it is safe to clear list of exposed nodes. - adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes, + adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes, TrueMemOrderLatency); RejectMemNodes.clear(); NonAliasMemDefs.clear(); @@ -903,27 +985,27 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, unsigned ChainLatency = 0; if (AliasChain->getInstr()->mayLoad()) ChainLatency = TrueMemOrderLatency; - addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain, + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain, RejectMemNodes, ChainLatency); } AliasChain = SU; for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) - addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, PendingLoads[k], RejectMemNodes, TrueMemOrderLatency); for (MapVector<ValueType, std::vector<SUnit *> >::iterator I = AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) { for (unsigned i = 0, e = I->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, I->second[i], RejectMemNodes); } for (MapVector<ValueType, std::vector<SUnit *> >::iterator I = AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) { for (unsigned i = 0, e = I->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, I->second[i], RejectMemNodes, TrueMemOrderLatency); } - adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes, + adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes, TrueMemOrderLatency); PendingLoads.clear(); AliasMemDefs.clear(); @@ -937,7 +1019,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, BarrierChain->addPred(SDep(SU, SDep::Barrier)); UnderlyingObjectsVector Objs; - getUnderlyingObjectsForInstr(MI, MFI, Objs, *TM.getDataLayout()); + getUnderlyingObjectsForInstr(MI, MFI, Objs, MF.getDataLayout()); if (Objs.empty()) { // Treat all other stores conservatively. @@ -961,7 +1043,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); if (I != IE) { for (unsigned i = 0, e = I->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, I->second[i], RejectMemNodes, 0, true); // If we're not using AA, then we only need one store per object. @@ -986,7 +1068,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, ((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end()); if (J != JE) { for (unsigned i = 0, e = J->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, J->second[i], RejectMemNodes, TrueMemOrderLatency, true); J->second.clear(); @@ -996,15 +1078,15 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // Add dependencies from all the PendingLoads, i.e. loads // with no underlying object. for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) - addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, PendingLoads[k], RejectMemNodes, TrueMemOrderLatency); // Add dependence on alias chain, if needed. if (AliasChain) - addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain, + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain, RejectMemNodes); } - adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes, + adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes, TrueMemOrderLatency); } else if (MI->mayLoad()) { bool MayAlias = true; @@ -1012,7 +1094,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // Invariant load, no chain dependencies needed! } else { UnderlyingObjectsVector Objs; - getUnderlyingObjectsForInstr(MI, MFI, Objs, *TM.getDataLayout()); + getUnderlyingObjectsForInstr(MI, MFI, Objs, MF.getDataLayout()); if (Objs.empty()) { // A load with no underlying object. Depend on all @@ -1020,7 +1102,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, for (MapVector<ValueType, std::vector<SUnit *> >::iterator I = AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) for (unsigned i = 0, e = I->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, I->second[i], RejectMemNodes); PendingLoads.push_back(SU); @@ -1044,7 +1126,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); if (I != IE) for (unsigned i = 0, e = I->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, I->second[i], RejectMemNodes, 0, true); if (ThisMayAlias) AliasMemUses[V].push_back(SU); @@ -1052,11 +1134,11 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, NonAliasMemUses[V].push_back(SU); } if (MayAlias) - adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, + adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes, /*Latency=*/0); // Add dependencies on alias and barrier chains, if needed. if (MayAlias && AliasChain) - addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain, + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain, RejectMemNodes); if (BarrierChain) BarrierChain->addPred(SDep(SU, SDep::Barrier)); @@ -1068,7 +1150,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, Defs.clear(); Uses.clear(); - VRegDefs.clear(); + CurrentVRegDefs.clear(); + CurrentVRegUses.clear(); PendingLoads.clear(); } @@ -1080,11 +1163,9 @@ void ScheduleDAGInstrs::startBlockForKills(MachineBasicBlock *BB) { // Examine the live-in regs of all successors. for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) { - for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), - E = (*SI)->livein_end(); I != E; ++I) { - unsigned Reg = *I; + for (const auto &LI : (*SI)->liveins()) { // Repeat, for reg and all subregs. - for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true); SubRegs.isValid(); ++SubRegs) LiveRegs.set(*SubRegs); } @@ -1103,7 +1184,7 @@ static void toggleBundleKillFlag(MachineInstr *MI, unsigned Reg, // Once we set a kill flag on an instruction, we bail out, as otherwise we // might set it on too many operands. We will clear as many flags as we // can though. - MachineBasicBlock::instr_iterator Begin = MI; + MachineBasicBlock::instr_iterator Begin = MI->getIterator(); MachineBasicBlock::instr_iterator End = getBundleEnd(MI); while (Begin != End) { for (MachineOperand &MO : (--End)->operands()) { @@ -1237,7 +1318,7 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) { toggleKillFlag(MI, MO); DEBUG(MI->dump()); DEBUG(if (MI->getOpcode() == TargetOpcode::BUNDLE) { - MachineBasicBlock::instr_iterator Begin = MI; + MachineBasicBlock::instr_iterator Begin = MI->getIterator(); MachineBasicBlock::instr_iterator End = getBundleEnd(MI); while (++Begin != End) DEBUG(Begin->dump()); diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp index b2e4617720ff..1150d26e559b 100644 --- a/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -43,9 +43,12 @@ namespace llvm { return (Node->NumPreds > 10 || Node->NumSuccs > 10); } - static bool hasNodeAddressLabel(const SUnit *Node, - const ScheduleDAG *Graph) { - return true; + static std::string getNodeIdentifierLabel(const SUnit *Node, + const ScheduleDAG *Graph) { + std::string R; + raw_string_ostream OS(R); + OS << static_cast<const void *>(Node); + return R; } /// If you want to override the dot attributes printed for a particular diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 3b29306bb54a..0872d7a9a228 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -156,13 +156,16 @@ namespace { void deleteAndRecombine(SDNode *N); bool recursivelyDeleteUnusedNodes(SDNode *N); + /// Replaces all uses of the results of one DAG node with new values. SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, bool AddTo = true); + /// Replaces all uses of the results of one DAG node with new values. SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) { return CombineTo(N, &Res, 1, AddTo); } + /// Replaces all uses of the results of one DAG node with new values. SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo = true) { SDValue To[] = { Res0, Res1 }; @@ -233,18 +236,17 @@ namespace { SDValue visitADDE(SDNode *N); SDValue visitSUBE(SDNode *N); SDValue visitMUL(SDNode *N); + SDValue useDivRem(SDNode *N); SDValue visitSDIV(SDNode *N); SDValue visitUDIV(SDNode *N); - SDValue visitSREM(SDNode *N); - SDValue visitUREM(SDNode *N); + SDValue visitREM(SDNode *N); SDValue visitMULHU(SDNode *N); SDValue visitMULHS(SDNode *N); SDValue visitSMUL_LOHI(SDNode *N); SDValue visitUMUL_LOHI(SDNode *N); SDValue visitSMULO(SDNode *N); SDValue visitUMULO(SDNode *N); - SDValue visitSDIVREM(SDNode *N); - SDValue visitUDIVREM(SDNode *N); + SDValue visitIMINMAX(SDNode *N); SDValue visitAND(SDNode *N); SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference); SDValue visitOR(SDNode *N); @@ -265,6 +267,7 @@ namespace { SDValue visitVSELECT(SDNode *N); SDValue visitSELECT_CC(SDNode *N); SDValue visitSETCC(SDNode *N); + SDValue visitSETCCE(SDNode *N); SDValue visitSIGN_EXTEND(SDNode *N); SDValue visitZERO_EXTEND(SDNode *N); SDValue visitANY_EXTEND(SDNode *N); @@ -298,6 +301,10 @@ namespace { SDValue visitBRCOND(SDNode *N); SDValue visitBR_CC(SDNode *N); SDValue visitLOAD(SDNode *N); + + SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain); + SDValue replaceStoreOfFPConstant(StoreSDNode *ST); + SDValue visitSTORE(SDNode *N); SDValue visitINSERT_VECTOR_ELT(SDNode *N); SDValue visitEXTRACT_VECTOR_ELT(SDNode *N); @@ -312,9 +319,11 @@ namespace { SDValue visitMGATHER(SDNode *N); SDValue visitMSCATTER(SDNode *N); SDValue visitFP_TO_FP16(SDNode *N); + SDValue visitFP16_TO_FP(SDNode *N); SDValue visitFADDForFMACombine(SDNode *N); SDValue visitFSUBForFMACombine(SDNode *N); + SDValue visitFMULForFMACombine(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS); @@ -338,14 +347,17 @@ namespace { unsigned HiOp); SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); SDValue CombineExtLoad(SDNode *N); + SDValue combineRepeatedFPDivisors(SDNode *N); SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); SDValue BuildSDIV(SDNode *N); SDValue BuildSDIVPow2(SDNode *N); SDValue BuildUDIV(SDNode *N); - SDValue BuildReciprocalEstimate(SDValue Op); - SDValue BuildRsqrtEstimate(SDValue Op); - SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations); - SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations); + SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags); + SDValue BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags); + SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations, + SDNodeFlags *Flags); + SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations, + SDNodeFlags *Flags); SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); @@ -374,6 +386,10 @@ namespace { /// chain (aliasing node.) SDValue FindBetterChain(SDNode *N, SDValue Chain); + /// Do FindBetterChain for a store and any possibly adjacent stores on + /// consecutive chains. + bool findBetterNeighborChains(StoreSDNode *St); + /// Holds a pointer to an LSBaseSDNode as well as information on where it /// is located in a sequence of memory operations connected by a chain. struct MemOpLink { @@ -388,19 +404,37 @@ namespace { unsigned SequenceNum; }; + /// This is a helper function for visitMUL to check the profitability + /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). + /// MulNode is the original multiply, AddNode is (add x, c1), + /// and ConstNode is c2. + bool isMulAddWithConstProfitable(SDNode *MulNode, + SDValue &AddNode, + SDValue &ConstNode); + /// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a /// constant build_vector of the stored constant values in Stores. SDValue getMergedConstantVectorStore(SelectionDAG &DAG, SDLoc SL, ArrayRef<MemOpLink> Stores, + SmallVectorImpl<SDValue> &Chains, EVT Ty) const; + /// This is a helper function for visitAND and visitZERO_EXTEND. Returns + /// true if the (and (load x) c) pattern matches an extload. ExtVT returns + /// the type of the loaded value to be extended. LoadedVT returns the type + /// of the original loaded value. NarrowLoad returns whether the load would + /// need to be narrowed in order to match. + bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, + EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT, + bool &NarrowLoad); + /// This is a helper function for MergeConsecutiveStores. When the source /// elements of the consecutive stores are all constants or all extracted /// vector elements, try to merge them into one larger store. /// \return True if a merged store was created. bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes, - EVT MemVT, unsigned NumElem, + EVT MemVT, unsigned NumStores, bool IsConstantSrc, bool UseVector); /// This is a helper function for MergeConsecutiveStores. @@ -409,7 +443,7 @@ namespace { void getStoreMergeAndAliasCandidates( StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes, SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes); - + /// Merge consecutive store operations into a wide store. /// This optimization uses wide integers or vectors when possible. /// \return True if some memory operations were changed. @@ -427,9 +461,7 @@ namespace { DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) { - auto *F = DAG.getMachineFunction().getFunction(); - ForCodeSize = F->hasFnAttribute(Attribute::OptimizeForSize) || - F->hasFnAttribute(Attribute::MinSize); + ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize(); } /// Runs the dag combiner on all nodes in the work list @@ -606,6 +638,9 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, assert(Op.hasOneUse() && "Unknown reuse!"); assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); + + const SDNodeFlags *Flags = Op.getNode()->getFlags(); + switch (Op.getOpcode()) { default: llvm_unreachable("Unknown code"); case ISD::ConstantFP: { @@ -623,12 +658,12 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1), - Op.getOperand(1)); + Op.getOperand(1), Flags); // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(1), DAG, LegalOperations, Depth+1), - Op.getOperand(0)); + Op.getOperand(0), Flags); case ISD::FSUB: // We can't turn -(A-B) into B-A when we honor signed zeros. assert(Options.UnsafeFPMath); @@ -640,7 +675,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, // fold (fneg (fsub A, B)) -> (fsub B, A) return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), - Op.getOperand(1), Op.getOperand(0)); + Op.getOperand(1), Op.getOperand(0), Flags); case ISD::FMUL: case ISD::FDIV: @@ -652,13 +687,13 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1), - Op.getOperand(1)); + Op.getOperand(1), Flags); // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Op.getOperand(0), GetNegatedExpression(Op.getOperand(1), DAG, - LegalOperations, Depth+1)); + LegalOperations, Depth+1), Flags); case ISD::FP_EXTEND: case ISD::FSIN: @@ -1216,9 +1251,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) { LegalTypes = Level >= AfterLegalizeTypes; // Add all the dag nodes to the worklist. - for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), - E = DAG.allnodes_end(); I != E; ++I) - AddToWorklist(I); + for (SDNode &Node : DAG.allnodes()) + AddToWorklist(&Node); // Create a dummy node (which is not added to allnodes), that adds a reference // to the root node, preventing it from being deleted, and tracking any @@ -1333,16 +1367,18 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::MUL: return visitMUL(N); case ISD::SDIV: return visitSDIV(N); case ISD::UDIV: return visitUDIV(N); - case ISD::SREM: return visitSREM(N); - case ISD::UREM: return visitUREM(N); + case ISD::SREM: + case ISD::UREM: return visitREM(N); case ISD::MULHU: return visitMULHU(N); case ISD::MULHS: return visitMULHS(N); case ISD::SMUL_LOHI: return visitSMUL_LOHI(N); case ISD::UMUL_LOHI: return visitUMUL_LOHI(N); case ISD::SMULO: return visitSMULO(N); case ISD::UMULO: return visitUMULO(N); - case ISD::SDIVREM: return visitSDIVREM(N); - case ISD::UDIVREM: return visitUDIVREM(N); + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: return visitIMINMAX(N); case ISD::AND: return visitAND(N); case ISD::OR: return visitOR(N); case ISD::XOR: return visitXOR(N); @@ -1361,6 +1397,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::VSELECT: return visitVSELECT(N); case ISD::SELECT_CC: return visitSELECT_CC(N); case ISD::SETCC: return visitSETCC(N); + case ISD::SETCCE: return visitSETCCE(N); case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); case ISD::ANY_EXTEND: return visitANY_EXTEND(N); @@ -1408,6 +1445,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::MSCATTER: return visitMSCATTER(N); case ISD::MSTORE: return visitMSTORE(N); case ISD::FP_TO_FP16: return visitFP_TO_FP16(N); + case ISD::FP16_TO_FP: return visitFP16_TO_FP(N); } return SDValue(); } @@ -1470,13 +1508,8 @@ SDValue DAGCombiner::combine(SDNode *N) { // Constant operands are canonicalized to RHS. if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) { SDValue Ops[] = {N1, N0}; - SDNode *CSENode; - if (const auto *BinNode = dyn_cast<BinaryWithFlagsSDNode>(N)) { - CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops, - &BinNode->Flags); - } else { - CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops); - } + SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops, + N->getFlags()); if (CSENode) return SDValue(CSENode, 0); } @@ -1595,26 +1628,6 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { return SDValue(N, 0); // Return N so it doesn't get rechecked! } -static bool isNullConstant(SDValue V) { - ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); - return Const != nullptr && Const->isNullValue(); -} - -static bool isNullFPConstant(SDValue V) { - ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(V); - return Const != nullptr && Const->isZero() && !Const->isNegative(); -} - -static bool isAllOnesConstant(SDValue V) { - ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); - return Const != nullptr && Const->isAllOnesValue(); -} - -static bool isOneConstant(SDValue V) { - ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); - return Const != nullptr && Const->isOne(); -} - /// If \p N is a ContantSDNode with isOpaque() == false return it casted to a /// ContantSDNode pointer else nullptr. static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) { @@ -1721,22 +1734,9 @@ SDValue DAGCombiner::visitADD(SDNode *N) { return SDValue(N, 0); // fold (a+b) -> (a|b) iff a and b share no bits. - if (VT.isInteger() && !VT.isVector()) { - APInt LHSZero, LHSOne; - APInt RHSZero, RHSOne; - DAG.computeKnownBits(N0, LHSZero, LHSOne); - - if (LHSZero.getBoolValue()) { - DAG.computeKnownBits(N1, RHSZero, RHSOne); - - // If all possibly-set bits on the LHS are clear on the RHS, return an OR. - // If all possibly-set bits on the RHS are clear on the LHS, return an OR. - if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero){ - if (!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) - return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1); - } - } - } + if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) && + VT.isInteger() && !VT.isVector() && DAG.haveNoCommonBitsSet(N0, N1)) + return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1); // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB && @@ -1971,31 +1971,26 @@ SDValue DAGCombiner::visitSUBC(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); + SDLoc DL(N); // If the flag result is dead, turn this into an SUB. if (!N->hasAnyUseOfValue(1)) - return CombineTo(N, DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1), - DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), - MVT::Glue)); + return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1), + DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); // fold (subc x, x) -> 0 + no borrow - if (N0 == N1) { - SDLoc DL(N); + if (N0 == N1) return CombineTo(N, DAG.getConstant(0, DL, VT), - DAG.getNode(ISD::CARRY_FALSE, DL, - MVT::Glue)); - } + DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); // fold (subc x, 0) -> x + no borrow if (isNullConstant(N1)) - return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), - MVT::Glue)); + return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow if (isAllOnesConstant(N0)) - return CombineTo(N, DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0), - DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), - MVT::Glue)); + return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0), + DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); return SDValue(); } @@ -2130,14 +2125,15 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { } // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) - if (N1IsConst && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && - (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || - isa<ConstantSDNode>(N0.getOperand(1)))) - return DAG.getNode(ISD::ADD, SDLoc(N), VT, - DAG.getNode(ISD::MUL, SDLoc(N0), VT, - N0.getOperand(0), N1), - DAG.getNode(ISD::MUL, SDLoc(N1), VT, - N0.getOperand(1), N1)); + if (isConstantIntBuildVectorOrConstantInt(N1) && + N0.getOpcode() == ISD::ADD && + isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) && + isMulAddWithConstProfitable(N, N0, N1)) + return DAG.getNode(ISD::ADD, SDLoc(N), VT, + DAG.getNode(ISD::MUL, SDLoc(N0), VT, + N0.getOperand(0), N1), + DAG.getNode(ISD::MUL, SDLoc(N1), VT, + N0.getOperand(1), N1)); // reassociate mul if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1)) @@ -2146,6 +2142,88 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { return SDValue(); } +/// Return true if divmod libcall is available. +static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, + const TargetLowering &TLI) { + RTLIB::Libcall LC; + switch (Node->getSimpleValueType(0).SimpleTy) { + default: return false; // No libcall for vector types. + case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; + case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; + case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; + case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; + case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break; + } + + return TLI.getLibcallName(LC) != nullptr; +} + +/// Issue divrem if both quotient and remainder are needed. +SDValue DAGCombiner::useDivRem(SDNode *Node) { + if (Node->use_empty()) + return SDValue(); // This is a dead node, leave it alone. + + EVT VT = Node->getValueType(0); + if (!TLI.isTypeLegal(VT)) + return SDValue(); + + unsigned Opcode = Node->getOpcode(); + bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM); + + unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; + // If DIVREM is going to get expanded into a libcall, + // but there is no libcall available, then don't combine. + if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) && + !isDivRemLibcallAvailable(Node, isSigned, TLI)) + return SDValue(); + + // If div is legal, it's better to do the normal expansion + unsigned OtherOpcode = 0; + if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) { + OtherOpcode = isSigned ? ISD::SREM : ISD::UREM; + if (TLI.isOperationLegalOrCustom(Opcode, VT)) + return SDValue(); + } else { + OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV; + if (TLI.isOperationLegalOrCustom(OtherOpcode, VT)) + return SDValue(); + } + + SDValue Op0 = Node->getOperand(0); + SDValue Op1 = Node->getOperand(1); + SDValue combined; + for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), + UE = Op0.getNode()->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; + if (User == Node || User->use_empty()) + continue; + // Convert the other matching node(s), too; + // otherwise, the DIVREM may get target-legalized into something + // target-specific that we won't be able to recognize. + unsigned UserOpc = User->getOpcode(); + if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) && + User->getOperand(0) == Op0 && + User->getOperand(1) == Op1) { + if (!combined) { + if (UserOpc == OtherOpcode) { + SDVTList VTs = DAG.getVTList(VT, VT); + combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1); + } else if (UserOpc == DivRemOpc) { + combined = SDValue(User, 0); + } else { + assert(UserOpc == Opcode); + continue; + } + } + if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV) + CombineTo(User, combined); + else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM) + CombineTo(User, combined.getValue(1)); + } + } + return combined; +} + SDValue DAGCombiner::visitSDIV(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -2156,26 +2234,26 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { if (SDValue FoldedVOp = SimplifyVBinOp(N)) return FoldedVOp; + SDLoc DL(N); + // fold (sdiv c1, c2) -> c1/c2 ConstantSDNode *N0C = isConstOrConstSplat(N0); ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque()) - return DAG.FoldConstantArithmetic(ISD::SDIV, SDLoc(N), VT, N0C, N1C); + return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C); // fold (sdiv X, 1) -> X if (N1C && N1C->isOne()) return N0; // fold (sdiv X, -1) -> 0-X - if (N1C && N1C->isAllOnesValue()) { - SDLoc DL(N); + if (N1C && N1C->isAllOnesValue()) return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0); - } + // If we know the sign bits of both operands are zero, strength reduce to a // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 if (!VT.isVector()) { if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) - return DAG.getNode(ISD::UDIV, SDLoc(N), N1.getValueType(), - N0, N1); + return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1); } // fold (sdiv X, pow2) -> simple ops after legalize @@ -2186,18 +2264,11 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { !cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact() && (N1C->getAPIntValue().isPowerOf2() || (-N1C->getAPIntValue()).isPowerOf2())) { - // If dividing by powers of two is cheap, then don't perform the following - // fold. - if (TLI.isPow2SDivCheap()) - return SDValue(); - // Target-specific implementation of sdiv x, pow2. - SDValue Res = BuildSDIVPow2(N); - if (Res.getNode()) + if (SDValue Res = BuildSDIVPow2(N)) return Res; unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); - SDLoc DL(N); // Splat the sign bit into the register SDValue SGN = @@ -2228,15 +2299,23 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { } // If integer divide is expensive and we satisfy the requirements, emit an - // alternate sequence. - if (N1C && !TLI.isIntDivCheap()) { - SDValue Op = BuildSDIV(N); - if (Op.getNode()) return Op; - } + // alternate sequence. Targets may check function attributes for size/speed + // trade-offs. + AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes(); + if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr)) + if (SDValue Op = BuildSDIV(N)) + return Op; + + // sdiv, srem -> sdivrem + // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true. + // Otherwise, we break the simplification logic in visitREM(). + if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) + if (SDValue DivRem = useDivRem(N)) + return DivRem; // undef / X -> 0 if (N0.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, SDLoc(N), VT); + return DAG.getConstant(0, DL, VT); // X / undef -> undef if (N1.getOpcode() == ISD::UNDEF) return N1; @@ -2254,26 +2333,26 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { if (SDValue FoldedVOp = SimplifyVBinOp(N)) return FoldedVOp; + SDLoc DL(N); + // fold (udiv c1, c2) -> c1/c2 ConstantSDNode *N0C = isConstOrConstSplat(N0); ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N0C && N1C) - if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, SDLoc(N), VT, + if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, N0C, N1C)) return Folded; // fold (udiv x, (1 << c)) -> x >>u c - if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2()) { - SDLoc DL(N); + if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2()) return DAG.getNode(ISD::SRL, DL, VT, N0, DAG.getConstant(N1C->getAPIntValue().logBase2(), DL, getShiftAmountTy(N0.getValueType()))); - } + // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 if (N1.getOpcode() == ISD::SHL) { if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) { if (SHC->getAPIntValue().isPowerOf2()) { EVT ADDVT = N1.getOperand(1).getValueType(); - SDLoc DL(N); SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), DAG.getConstant(SHC->getAPIntValue() @@ -2284,15 +2363,23 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { } } } + // fold (udiv x, c) -> alternate - if (N1C && !TLI.isIntDivCheap()) { - SDValue Op = BuildUDIV(N); - if (Op.getNode()) return Op; - } + AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes(); + if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr)) + if (SDValue Op = BuildUDIV(N)) + return Op; + + // sdiv, srem -> sdivrem + // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true. + // Otherwise, we break the simplification logic in visitREM(). + if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) + if (SDValue DivRem = useDivRem(N)) + return DivRem; // undef / X -> 0 if (N0.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, SDLoc(N), VT); + return DAG.getConstant(0, DL, VT); // X / undef -> undef if (N1.getOpcode() == ISD::UNDEF) return N1; @@ -2300,102 +2387,83 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { return SDValue(); } -SDValue DAGCombiner::visitSREM(SDNode *N) { +// handles ISD::SREM and ISD::UREM +SDValue DAGCombiner::visitREM(SDNode *N) { + unsigned Opcode = N->getOpcode(); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); + bool isSigned = (Opcode == ISD::SREM); + SDLoc DL(N); - // fold (srem c1, c2) -> c1%c2 + // fold (rem c1, c2) -> c1%c2 ConstantSDNode *N0C = isConstOrConstSplat(N0); ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N0C && N1C) - if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::SREM, SDLoc(N), VT, - N0C, N1C)) + if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C)) return Folded; - // If we know the sign bits of both operands are zero, strength reduce to a - // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 - if (!VT.isVector()) { - if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) - return DAG.getNode(ISD::UREM, SDLoc(N), VT, N0, N1); - } - // If X/C can be simplified by the division-by-constant logic, lower - // X%C to the equivalent of X-X/C*C. - if (N1C && !N1C->isNullValue()) { - SDValue Div = DAG.getNode(ISD::SDIV, SDLoc(N), VT, N0, N1); - AddToWorklist(Div.getNode()); - SDValue OptimizedDiv = combine(Div.getNode()); - if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { - SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, - OptimizedDiv, N1); - SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul); - AddToWorklist(Mul.getNode()); - return Sub; + if (isSigned) { + // If we know the sign bits of both operands are zero, strength reduce to a + // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 + if (!VT.isVector()) { + if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::UREM, DL, VT, N0, N1); } - } - - // undef % X -> 0 - if (N0.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, SDLoc(N), VT); - // X % undef -> undef - if (N1.getOpcode() == ISD::UNDEF) - return N1; - - return SDValue(); -} - -SDValue DAGCombiner::visitUREM(SDNode *N) { - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); - EVT VT = N->getValueType(0); - - // fold (urem c1, c2) -> c1%c2 - ConstantSDNode *N0C = isConstOrConstSplat(N0); - ConstantSDNode *N1C = isConstOrConstSplat(N1); - if (N0C && N1C) - if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UREM, SDLoc(N), VT, - N0C, N1C)) - return Folded; - // fold (urem x, pow2) -> (and x, pow2-1) - if (N1C && !N1C->isNullValue() && !N1C->isOpaque() && - N1C->getAPIntValue().isPowerOf2()) { - SDLoc DL(N); - return DAG.getNode(ISD::AND, DL, VT, N0, - DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT)); - } - // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) - if (N1.getOpcode() == ISD::SHL) { - if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) { - if (SHC->getAPIntValue().isPowerOf2()) { - SDLoc DL(N); - SDValue Add = - DAG.getNode(ISD::ADD, DL, VT, N1, + } else { + // fold (urem x, pow2) -> (and x, pow2-1) + if (N1C && !N1C->isNullValue() && !N1C->isOpaque() && + N1C->getAPIntValue().isPowerOf2()) { + return DAG.getNode(ISD::AND, DL, VT, N0, + DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT)); + } + // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) + if (N1.getOpcode() == ISD::SHL) { + if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) { + if (SHC->getAPIntValue().isPowerOf2()) { + SDValue Add = + DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, VT)); - AddToWorklist(Add.getNode()); - return DAG.getNode(ISD::AND, DL, VT, N0, Add); + AddToWorklist(Add.getNode()); + return DAG.getNode(ISD::AND, DL, VT, N0, Add); + } } } } + AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes(); + // If X/C can be simplified by the division-by-constant logic, lower // X%C to the equivalent of X-X/C*C. - if (N1C && !N1C->isNullValue()) { - SDValue Div = DAG.getNode(ISD::UDIV, SDLoc(N), VT, N0, N1); + // To avoid mangling nodes, this simplification requires that the combine() + // call for the speculative DIV must not cause a DIVREM conversion. We guard + // against this by skipping the simplification if isIntDivCheap(). When + // div is not cheap, combine will not return a DIVREM. Regardless, + // checking cheapness here makes sense since the simplification results in + // fatter code. + if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) { + unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV; + SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1); AddToWorklist(Div.getNode()); SDValue OptimizedDiv = combine(Div.getNode()); if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { - SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, - OptimizedDiv, N1); - SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul); + assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) && + (OptimizedDiv.getOpcode() != ISD::SDIVREM)); + SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1); + SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul); AddToWorklist(Mul.getNode()); return Sub; } } + // sdiv, srem -> sdivrem + if (SDValue DivRem = useDivRem(N)) + return DivRem.getValue(1); + // undef % X -> 0 if (N0.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, SDLoc(N), VT); + return DAG.getConstant(0, DL, VT); // X % undef -> undef if (N1.getOpcode() == ISD::UNDEF) return N1; @@ -2532,8 +2600,8 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, } SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { - SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS); - if (Res.getNode()) return Res; + if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS)) + return Res; EVT VT = N->getValueType(0); SDLoc DL(N); @@ -2563,8 +2631,8 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { } SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { - SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU); - if (Res.getNode()) return Res; + if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU)) + return Res; EVT VT = N->getValueType(0); SDLoc DL(N); @@ -2613,16 +2681,26 @@ SDValue DAGCombiner::visitUMULO(SDNode *N) { return SDValue(); } -SDValue DAGCombiner::visitSDIVREM(SDNode *N) { - SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM); - if (Res.getNode()) return Res; +SDValue DAGCombiner::visitIMINMAX(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + + // fold vector ops + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; - return SDValue(); -} + // fold (add c1, c2) -> c1+c2 + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); + if (N0C && N1C) + return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C); -SDValue DAGCombiner::visitUDIVREM(SDNode *N) { - SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM); - if (Res.getNode()) return Res; + // canonicalize constant to RHS + if (isConstantIntBuildVectorOrConstantInt(N0) && + !isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); return SDValue(); } @@ -2848,10 +2926,13 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, if (Result != ISD::SETCC_INVALID && (!LegalOperations || (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && - TLI.isOperationLegal(ISD::SETCC, - getSetCCResultType(N0.getSimpleValueType()))))) - return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(), - LL, LR, Result); + TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) { + EVT CCVT = getSetCCResultType(LL.getValueType()); + if (N0.getValueType() == CCVT || + (!LegalOperations && N0.getValueType() == MVT::i1)) + return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(), + LL, LR, Result); + } } } @@ -2887,6 +2968,46 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, return SDValue(); } +bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, + EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT, + bool &NarrowLoad) { + uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits(); + + if (ActiveBits == 0 || !APIntOps::isMask(ActiveBits, AndC->getAPIntValue())) + return false; + + ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); + LoadedVT = LoadN->getMemoryVT(); + + if (ExtVT == LoadedVT && + (!LegalOperations || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) { + // ZEXTLOAD will match without needing to change the size of the value being + // loaded. + NarrowLoad = false; + return true; + } + + // Do not change the width of a volatile load. + if (LoadN->isVolatile()) + return false; + + // Do not generate loads of non-round integer types since these can + // be expensive (and would be wrong if the type is not byte sized). + if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound()) + return false; + + if (LegalOperations && + !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT)) + return false; + + if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT)) + return false; + + NarrowLoad = true; + return true; +} + SDValue DAGCombiner::visitAND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -3079,16 +3200,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) { : cast<LoadSDNode>(N0); if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) { - uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits(); - if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){ - EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); - EVT LoadedVT = LN0->getMemoryVT(); - EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; - - if (ExtVT == LoadedVT && - (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, - ExtVT))) { - + auto NarrowLoad = false; + EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; + EVT ExtVT, LoadedVT; + if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, ExtVT, LoadedVT, + NarrowLoad)) { + if (!NarrowLoad) { SDValue NewLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), LN0->getBasePtr(), ExtVT, @@ -3096,14 +3213,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { AddToWorklist(N); CombineTo(LN0, NewLoad, NewLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! - } - - // Do not change the width of a volatile load. - // Do not generate loads of non-round integer types since these can - // be expensive (and would be wrong if the type is not byte sized). - if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() && - (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, - ExtVT))) { + } else { EVT PtrType = LN0->getOperand(1).getValueType(); unsigned Alignment = LN0->getAlignment(); @@ -3142,10 +3252,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return Combined; // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) - if (N0.getOpcode() == N1.getOpcode()) { - SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); - if (Tmp.getNode()) return Tmp; - } + if (N0.getOpcode() == N1.getOpcode()) + if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) + return Tmp; // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) // fold (and (sra)) -> (and (srl)) when possible. @@ -3507,10 +3616,13 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) { if (Result != ISD::SETCC_INVALID && (!LegalOperations || (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && - TLI.isOperationLegal(ISD::SETCC, - getSetCCResultType(N0.getValueType()))))) - return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(), - LL, LR, Result); + TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) { + EVT CCVT = getSetCCResultType(LL.getValueType()); + if (N0.getValueType() == CCVT || + (!LegalOperations && N0.getValueType() == MVT::i1)) + return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(), + LL, LR, Result); + } } } @@ -3665,11 +3777,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return Combined; // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) - SDValue BSwap = MatchBSwapHWord(N, N0, N1); - if (BSwap.getNode()) + if (SDValue BSwap = MatchBSwapHWord(N, N0, N1)) return BSwap; - BSwap = MatchBSwapHWordLow(N, N0, N1); - if (BSwap.getNode()) + if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1)) return BSwap; // reassociate or @@ -3690,10 +3800,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) { } } // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) - if (N0.getOpcode() == N1.getOpcode()) { - SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); - if (Tmp.getNode()) return Tmp; - } + if (N0.getOpcode() == N1.getOpcode()) + if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) + return Tmp; // See if this is some rotate idiom. if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N))) @@ -3710,7 +3819,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { /// Match "(X shl/srl V1) & V2" where V2 may not be present. static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { if (Op.getOpcode() == ISD::AND) { - if (isa<ConstantSDNode>(Op.getOperand(1))) { + if (isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) { Mask = Op.getOperand(1); Op = Op.getOperand(0); } else { @@ -3727,105 +3836,106 @@ static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { } // Return true if we can prove that, whenever Neg and Pos are both in the -// range [0, OpSize), Neg == (Pos == 0 ? 0 : OpSize - Pos). This means that +// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that // for two opposing shifts shift1 and shift2 and a value X with OpBits bits: // // (or (shift1 X, Neg), (shift2 X, Pos)) // // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate -// in direction shift1 by Neg. The range [0, OpSize) means that we only need +// in direction shift1 by Neg. The range [0, EltSize) means that we only need // to consider shift amounts with defined behavior. -static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned OpSize) { - // If OpSize is a power of 2 then: +static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) { + // If EltSize is a power of 2 then: // - // (a) (Pos == 0 ? 0 : OpSize - Pos) == (OpSize - Pos) & (OpSize - 1) - // (b) Neg == Neg & (OpSize - 1) whenever Neg is in [0, OpSize). + // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1) + // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize). // - // So if OpSize is a power of 2 and Neg is (and Neg', OpSize-1), we check + // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check // for the stronger condition: // - // Neg & (OpSize - 1) == (OpSize - Pos) & (OpSize - 1) [A] + // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A] // - // for all Neg and Pos. Since Neg & (OpSize - 1) == Neg' & (OpSize - 1) + // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1) // we can just replace Neg with Neg' for the rest of the function. // // In other cases we check for the even stronger condition: // - // Neg == OpSize - Pos [B] + // Neg == EltSize - Pos [B] // // for all Neg and Pos. Note that the (or ...) then invokes undefined - // behavior if Pos == 0 (and consequently Neg == OpSize). + // behavior if Pos == 0 (and consequently Neg == EltSize). // - // We could actually use [A] whenever OpSize is a power of 2, but the + // We could actually use [A] whenever EltSize is a power of 2, but the // only extra cases that it would match are those uninteresting ones // where Neg and Pos are never in range at the same time. E.g. for - // OpSize == 32, using [A] would allow a Neg of the form (sub 64, Pos) + // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos) // as well as (sub 32, Pos), but: // // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos)) // // always invokes undefined behavior for 32-bit X. // - // Below, Mask == OpSize - 1 when using [A] and is all-ones otherwise. + // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise. unsigned MaskLoBits = 0; - if (Neg.getOpcode() == ISD::AND && - isPowerOf2_64(OpSize) && - Neg.getOperand(1).getOpcode() == ISD::Constant && - cast<ConstantSDNode>(Neg.getOperand(1))->getAPIntValue() == OpSize - 1) { - Neg = Neg.getOperand(0); - MaskLoBits = Log2_64(OpSize); + if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) { + if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) { + if (NegC->getAPIntValue() == EltSize - 1) { + Neg = Neg.getOperand(0); + MaskLoBits = Log2_64(EltSize); + } + } } // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1. if (Neg.getOpcode() != ISD::SUB) - return 0; - ConstantSDNode *NegC = dyn_cast<ConstantSDNode>(Neg.getOperand(0)); + return false; + ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0)); if (!NegC) - return 0; + return false; SDValue NegOp1 = Neg.getOperand(1); - // On the RHS of [A], if Pos is Pos' & (OpSize - 1), just replace Pos with + // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with // Pos'. The truncation is redundant for the purpose of the equality. - if (MaskLoBits && - Pos.getOpcode() == ISD::AND && - Pos.getOperand(1).getOpcode() == ISD::Constant && - cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() == OpSize - 1) - Pos = Pos.getOperand(0); + if (MaskLoBits && Pos.getOpcode() == ISD::AND) + if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) + if (PosC->getAPIntValue() == EltSize - 1) + Pos = Pos.getOperand(0); // The condition we need is now: // - // (NegC - NegOp1) & Mask == (OpSize - Pos) & Mask + // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask // // If NegOp1 == Pos then we need: // - // OpSize & Mask == NegC & Mask + // EltSize & Mask == NegC & Mask // // (because "x & Mask" is a truncation and distributes through subtraction). APInt Width; if (Pos == NegOp1) Width = NegC->getAPIntValue(); + // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC. // Then the condition we want to prove becomes: // - // (NegC - NegOp1) & Mask == (OpSize - (NegOp1 + PosC)) & Mask + // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask // // which, again because "x & Mask" is a truncation, becomes: // - // NegC & Mask == (OpSize - PosC) & Mask - // OpSize & Mask == (NegC + PosC) & Mask - else if (Pos.getOpcode() == ISD::ADD && - Pos.getOperand(0) == NegOp1 && - Pos.getOperand(1).getOpcode() == ISD::Constant) - Width = (cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() + - NegC->getAPIntValue()); - else + // NegC & Mask == (EltSize - PosC) & Mask + // EltSize & Mask == (NegC + PosC) & Mask + else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) { + if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) + Width = PosC->getAPIntValue() + NegC->getAPIntValue(); + else + return false; + } else return false; - // Now we just need to check that OpSize & Mask == Width & Mask. + // Now we just need to check that EltSize & Mask == Width & Mask. if (MaskLoBits) - // Opsize & Mask is 0 since Mask is Opsize - 1. + // EltSize & Mask is 0 since Mask is EltSize - 1. return Width.getLoBits(MaskLoBits) == 0; - return Width == OpSize; + return Width == EltSize; } // A subroutine of MatchRotate used once we have found an OR of two opposite @@ -3845,7 +3955,7 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, // (srl x, (*ext y))) -> // (rotr x, y) or (rotl x, (sub 32, y)) EVT VT = Shifted.getValueType(); - if (matchRotateSub(InnerPos, InnerNeg, VT.getSizeInBits())) { + if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) { bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted, HasPos ? Pos : Neg).getNode(); @@ -3888,10 +3998,10 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { if (RHSShift.getOpcode() == ISD::SHL) { std::swap(LHS, RHS); std::swap(LHSShift, RHSShift); - std::swap(LHSMask , RHSMask ); + std::swap(LHSMask, RHSMask); } - unsigned OpSizeInBits = VT.getSizeInBits(); + unsigned EltSizeInBits = VT.getScalarSizeInBits(); SDValue LHSShiftArg = LHSShift.getOperand(0); SDValue LHSShiftAmt = LHSShift.getOperand(1); SDValue RHSShiftArg = RHSShift.getOperand(0); @@ -3899,11 +4009,10 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) - if (LHSShiftAmt.getOpcode() == ISD::Constant && - RHSShiftAmt.getOpcode() == ISD::Constant) { - uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue(); - uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue(); - if ((LShVal + RShVal) != OpSizeInBits) + if (isConstOrConstSplat(LHSShiftAmt) && isConstOrConstSplat(RHSShiftAmt)) { + uint64_t LShVal = isConstOrConstSplat(LHSShiftAmt)->getZExtValue(); + uint64_t RShVal = isConstOrConstSplat(RHSShiftAmt)->getZExtValue(); + if ((LShVal + RShVal) != EltSizeInBits) return nullptr; SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, @@ -3911,18 +4020,23 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { // If there is an AND of either shifted operand, apply it to the result. if (LHSMask.getNode() || RHSMask.getNode()) { - APInt Mask = APInt::getAllOnesValue(OpSizeInBits); + APInt AllBits = APInt::getAllOnesValue(EltSizeInBits); + SDValue Mask = DAG.getConstant(AllBits, DL, VT); if (LHSMask.getNode()) { - APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal); - Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits; + APInt RHSBits = APInt::getLowBitsSet(EltSizeInBits, LShVal); + Mask = DAG.getNode(ISD::AND, DL, VT, Mask, + DAG.getNode(ISD::OR, DL, VT, LHSMask, + DAG.getConstant(RHSBits, DL, VT))); } if (RHSMask.getNode()) { - APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal); - Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits; + APInt LHSBits = APInt::getHighBitsSet(EltSizeInBits, RShVal); + Mask = DAG.getNode(ISD::AND, DL, VT, Mask, + DAG.getNode(ISD::OR, DL, VT, RHSMask, + DAG.getConstant(LHSBits, DL, VT))); } - Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, DL, VT)); + Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask); } return Rot.getNode(); @@ -4112,10 +4226,9 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { } // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) - if (N0.getOpcode() == N1.getOpcode()) { - SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); - if (Tmp.getNode()) return Tmp; - } + if (N0.getOpcode() == N1.getOpcode()) + if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) + return Tmp; // Simplify the expression using non-local knowledge. if (!VT.isVector() && @@ -4434,12 +4547,19 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1); } - if (N1C && !N1C->isOpaque()) { - SDValue NewSHL = visitShiftByConstant(N, N1C); - if (NewSHL.getNode()) - return NewSHL; + // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2) + if (N1C && N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse()) { + if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { + if (SDValue Folded = + DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, N0C1, N1C)) + return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Folded); + } } + if (N1C && !N1C->isOpaque()) + if (SDValue NewSHL = visitShiftByConstant(N, N1C)) + return NewSHL; + return SDValue(); } @@ -4583,11 +4703,9 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1); - if (N1C && !N1C->isOpaque()) { - SDValue NewSRA = visitShiftByConstant(N, N1C); - if (NewSRA.getNode()) + if (N1C && !N1C->isOpaque()) + if (SDValue NewSRA = visitShiftByConstant(N, N1C)) return NewSRA; - } return SDValue(); } @@ -4744,8 +4862,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && N1.getOperand(0).getOpcode() == ISD::AND) { - SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); - if (NewOp1.getNode()) + if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1); } @@ -4754,15 +4871,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (N1C && SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); - if (N1C && !N1C->isOpaque()) { - SDValue NewSRL = visitShiftByConstant(N, N1C); - if (NewSRL.getNode()) + if (N1C && !N1C->isOpaque()) + if (SDValue NewSRL = visitShiftByConstant(N, N1C)) return NewSRL; - } // Attempt to convert a srl of a load into a narrower zero-extending load. - SDValue NarrowLoad = ReduceLoadWidth(N); - if (NarrowLoad.getNode()) + if (SDValue NarrowLoad = ReduceLoadWidth(N)) return NarrowLoad; // Here is a common situation. We want to optimize: @@ -4973,70 +5087,47 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { if (SimplifySelectOps(N, N1, N2)) return SDValue(N, 0); // Don't revisit N. - // fold selects based on a setcc into other things, such as min/max/abs - if (N0.getOpcode() == ISD::SETCC) { - // select x, y (fcmp lt x, y) -> fminnum x, y - // select x, y (fcmp gt x, y) -> fmaxnum x, y - // - // This is OK if we don't care about what happens if either operand is a - // NaN. - // - - // FIXME: Instead of testing for UnsafeFPMath, this should be checking for - // no signed zeros as well as no nans. - const TargetOptions &Options = DAG.getTarget().Options; - if (Options.UnsafeFPMath && - VT.isFloatingPoint() && N0.hasOneUse() && - DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) { - ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); - - SDValue FMinMax = - combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), - N1, N2, CC, TLI, DAG); - if (FMinMax) - return FMinMax; - } - - if ((!LegalOperations && - TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) || - TLI.isOperationLegal(ISD::SELECT_CC, VT)) - return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, - N0.getOperand(0), N0.getOperand(1), - N1, N2, N0.getOperand(2)); - return SimplifySelect(SDLoc(N), N0, N1, N2); - } - if (VT0 == MVT::i1) { - if (TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { - // select (and Cond0, Cond1), X, Y - // -> select Cond0, (select Cond1, X, Y), Y - if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) { - SDValue Cond0 = N0->getOperand(0); - SDValue Cond1 = N0->getOperand(1); - SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), - N1.getValueType(), Cond1, N1, N2); + // The code in this block deals with the following 2 equivalences: + // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y)) + // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y) + // The target can specify its prefered form with the + // shouldNormalizeToSelectSequence() callback. However we always transform + // to the right anyway if we find the inner select exists in the DAG anyway + // and we always transform to the left side if we know that we can further + // optimize the combination of the conditions. + bool normalizeToSequence + = TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT); + // select (and Cond0, Cond1), X, Y + // -> select Cond0, (select Cond1, X, Y), Y + if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) { + SDValue Cond0 = N0->getOperand(0); + SDValue Cond1 = N0->getOperand(1); + SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), + N1.getValueType(), Cond1, N1, N2); + if (normalizeToSequence || !InnerSelect.use_empty()) return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, InnerSelect, N2); - } - // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y) - if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) { - SDValue Cond0 = N0->getOperand(0); - SDValue Cond1 = N0->getOperand(1); - SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), - N1.getValueType(), Cond1, N1, N2); + } + // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y) + if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) { + SDValue Cond0 = N0->getOperand(0); + SDValue Cond1 = N0->getOperand(1); + SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), + N1.getValueType(), Cond1, N1, N2); + if (normalizeToSequence || !InnerSelect.use_empty()) return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1, InnerSelect); - } } // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y - if (N1->getOpcode() == ISD::SELECT) { + if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) { SDValue N1_0 = N1->getOperand(0); SDValue N1_1 = N1->getOperand(1); SDValue N1_2 = N1->getOperand(2); if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) { // Create the actual and node if we can generate good code for it. - if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { + if (!normalizeToSequence) { SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(), N0, N1_0); return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And, @@ -5049,13 +5140,13 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { } } // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y - if (N2->getOpcode() == ISD::SELECT) { + if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) { SDValue N2_0 = N2->getOperand(0); SDValue N2_1 = N2->getOperand(1); SDValue N2_2 = N2->getOperand(2); if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) { // Create the actual or node if we can generate good code for it. - if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { + if (!normalizeToSequence) { SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(), N0, N2_0); return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or, @@ -5069,6 +5160,38 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { } } + // fold selects based on a setcc into other things, such as min/max/abs + if (N0.getOpcode() == ISD::SETCC) { + // select x, y (fcmp lt x, y) -> fminnum x, y + // select x, y (fcmp gt x, y) -> fmaxnum x, y + // + // This is OK if we don't care about what happens if either operand is a + // NaN. + // + + // FIXME: Instead of testing for UnsafeFPMath, this should be checking for + // no signed zeros as well as no nans. + const TargetOptions &Options = DAG.getTarget().Options; + if (Options.UnsafeFPMath && + VT.isFloatingPoint() && N0.hasOneUse() && + DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) { + ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); + + if (SDValue FMinMax = combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0), + N0.getOperand(1), N1, N2, CC, + TLI, DAG)) + return FMinMax; + } + + if ((!LegalOperations && + TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) || + TLI.isOperationLegal(ISD::SELECT_CC, VT)) + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, + N0.getOperand(0), N0.getOperand(1), + N1, N2, N0.getOperand(2)); + return SimplifySelect(SDLoc(N), N0, N1, N2); + } + return SDValue(); } @@ -5523,8 +5646,7 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { if (N1.getOpcode() == ISD::CONCAT_VECTORS && N2.getOpcode() == ISD::CONCAT_VECTORS && ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { - SDValue CV = ConvertSelectToConcatVector(N, DAG); - if (CV.getNode()) + if (SDValue CV = ConvertSelectToConcatVector(N, DAG)) return CV; } @@ -5580,7 +5702,20 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) { SDLoc(N)); } -/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or +SDValue DAGCombiner::visitSETCCE(SDNode *N) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue Carry = N->getOperand(2); + SDValue Cond = N->getOperand(3); + + // If Carry is false, fold to a regular SETCC. + if (Carry.getOpcode() == ISD::CARRY_FALSE) + return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond); + + return SDValue(); +} + +/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or /// a build_vector of constants. /// This function is called by the DAGCombiner when visiting sext/zext/aext /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). @@ -5837,8 +5972,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if (N0.getOpcode() == ISD::TRUNCATE) { // fold (sext (truncate (load x))) -> (sext (smaller load x)) // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n))) - SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); - if (NarrowLoad.getNode()) { + if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { SDNode* oye = N0.getNode()->getOperand(0).getNode(); if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); @@ -6024,7 +6158,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if (!VT.isVector()) { EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType()); - if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, SetCCVT)) { + if (!LegalOperations || + TLI.isOperationLegal(ISD::SETCC, N0.getOperand(0).getValueType())) { SDLoc DL(N); ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); SDValue SetCC = DAG.getSetCC(DL, SetCCVT, @@ -6120,8 +6255,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // fold (zext (truncate (load x))) -> (zext (smaller load x)) // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n))) if (N0.getOpcode() == ISD::TRUNCATE) { - SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); - if (NarrowLoad.getNode()) { + if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { SDNode* oye = N0.getNode()->getOperand(0).getNode(); if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); @@ -6133,32 +6267,45 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { } // fold (zext (truncate x)) -> (and x, mask) - if (N0.getOpcode() == ISD::TRUNCATE && - (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) { - + if (N0.getOpcode() == ISD::TRUNCATE) { // fold (zext (truncate (load x))) -> (zext (smaller load x)) // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n))) - SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); - if (NarrowLoad.getNode()) { - SDNode* oye = N0.getNode()->getOperand(0).getNode(); + if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { + SDNode *oye = N0.getNode()->getOperand(0).getNode(); if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); // CombineTo deleted the truncate, if needed, but not what's under it. AddToWorklist(oye); } - return SDValue(N, 0); // Return N so it doesn't get rechecked! + return SDValue(N, 0); // Return N so it doesn't get rechecked! } - SDValue Op = N0.getOperand(0); - if (Op.getValueType().bitsLT(VT)) { - Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op); - AddToWorklist(Op.getNode()); - } else if (Op.getValueType().bitsGT(VT)) { - Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); - AddToWorklist(Op.getNode()); + EVT SrcVT = N0.getOperand(0).getValueType(); + EVT MinVT = N0.getValueType(); + + // Try to mask before the extension to avoid having to generate a larger mask, + // possibly over several sub-vectors. + if (SrcVT.bitsLT(VT)) { + if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) && + TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) { + SDValue Op = N0.getOperand(0); + Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType()); + AddToWorklist(Op.getNode()); + return DAG.getZExtOrTrunc(Op, SDLoc(N), VT); + } + } + + if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) { + SDValue Op = N0.getOperand(0); + if (SrcVT.bitsLT(VT)) { + Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op); + AddToWorklist(Op.getNode()); + } else if (SrcVT.bitsGT(VT)) { + Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); + AddToWorklist(Op.getNode()); + } + return DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType()); } - return DAG.getZeroExtendInReg(Op, SDLoc(N), - N0.getValueType().getScalarType()); } // Fold (zext (and (trunc x), cst)) -> (and x, cst), @@ -6219,6 +6366,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // fold (zext (and/or/xor (load x), cst)) -> // (and/or/xor (zextload x), (zext cst)) + // Unless (and (load x) cst) will match as a zextload already and has + // additional users. if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::XOR) && isa<LoadSDNode>(N0.getOperand(0)) && @@ -6229,9 +6378,20 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) { bool DoXform = true; SmallVector<SDNode*, 4> SetCCs; - if (!N0.hasOneUse()) - DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND, - SetCCs, TLI); + if (!N0.hasOneUse()) { + if (N0.getOpcode() == ISD::AND) { + auto *AndC = cast<ConstantSDNode>(N0.getOperand(1)); + auto NarrowLoad = false; + EVT LoadResultTy = AndC->getValueType(0); + EVT ExtVT, LoadedVT; + if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT, LoadedVT, + NarrowLoad)) + DoXform = false; + } + if (DoXform) + DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), + ISD::ZERO_EXTEND, SetCCs, TLI); + } if (DoXform) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT, LN0->getChain(), LN0->getBasePtr(), @@ -6378,8 +6538,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // fold (aext (truncate (load x))) -> (aext (smaller load x)) // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n))) if (N0.getOpcode() == ISD::TRUNCATE) { - SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); - if (NarrowLoad.getNode()) { + if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { SDNode* oye = N0.getNode()->getOperand(0).getNode(); if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); @@ -6546,8 +6705,7 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { // Watch out for shift count overflow though. if (Amt >= Mask.getBitWidth()) break; APInt NewMask = Mask << Amt; - SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask); - if (SimplifyLHS.getNode()) + if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask)) return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(), SimplifyLHS, V.getOperand(1)); } @@ -6736,8 +6894,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { unsigned VTBits = VT.getScalarType().getSizeInBits(); unsigned EVTBits = EVT.getScalarType().getSizeInBits(); + if (N0.isUndef()) + return DAG.getUNDEF(VT); + // fold (sext_in_reg c1) -> c1 - if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF) + if (isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1); // If the input is already sign extended, just drop the extension. @@ -6771,8 +6932,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // fold (sext_in_reg (load x)) -> (smaller sextload x) // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits)) - SDValue NarrowLoad = ReduceLoadWidth(N); - if (NarrowLoad.getNode()) + if (SDValue NarrowLoad = ReduceLoadWidth(N)) return NarrowLoad; // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24) @@ -6831,29 +6991,6 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { BSwap, N1); } - // Fold a sext_inreg of a build_vector of ConstantSDNodes or undefs - // into a build_vector. - if (ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { - SmallVector<SDValue, 8> Elts; - unsigned NumElts = N0->getNumOperands(); - unsigned ShAmt = VTBits - EVTBits; - - for (unsigned i = 0; i != NumElts; ++i) { - SDValue Op = N0->getOperand(i); - if (Op->getOpcode() == ISD::UNDEF) { - Elts.push_back(Op); - continue; - } - - ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op); - const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue()); - Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(), - SDLoc(Op), Op.getValueType())); - } - - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Elts); - } - return SDValue(); } @@ -6999,9 +7136,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // fold (truncate (load x)) -> (smaller load x) // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) { - SDValue Reduced = ReduceLoadWidth(N); - if (Reduced.getNode()) + if (SDValue Reduced = ReduceLoadWidth(N)) return Reduced; + // Handle the case where the load remains an extending load even // after truncation. if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) { @@ -7107,6 +7244,12 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { return SDValue(); } +static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) { + // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi + // and Lo parts; on big-endian machines it doesn't. + return DAG.getDataLayout().isBigEndian() ? 1 : 0; +} + SDValue DAGCombiner::visitBITCAST(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -7173,6 +7316,14 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) + // + // For ppc_fp128: + // fold (bitcast (fneg x)) -> + // flipbit = signbit + // (xor (bitcast x) (build_pair flipbit, flipbit)) + // fold (bitcast (fabs x)) -> + // flipbit = (and (extract_element (bitcast x), 0), signbit) + // (xor (bitcast x) (build_pair flipbit, flipbit)) // This often reduces constant pool loads. if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) || (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) && @@ -7183,6 +7334,29 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { AddToWorklist(NewConv.getNode()); SDLoc DL(N); + if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) { + assert(VT.getSizeInBits() == 128); + SDValue SignBit = DAG.getConstant( + APInt::getSignBit(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64); + SDValue FlipBit; + if (N0.getOpcode() == ISD::FNEG) { + FlipBit = SignBit; + AddToWorklist(FlipBit.getNode()); + } else { + assert(N0.getOpcode() == ISD::FABS); + SDValue Hi = + DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv, + DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG), + SDLoc(NewConv))); + AddToWorklist(Hi.getNode()); + FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit); + AddToWorklist(FlipBit.getNode()); + } + SDValue FlipBits = + DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit); + AddToWorklist(FlipBits.getNode()); + return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits); + } APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); if (N0.getOpcode() == ISD::FNEG) return DAG.getNode(ISD::XOR, DL, VT, @@ -7196,6 +7370,13 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // (or (and (bitconvert x), sign), (and cst, (not sign))) // Note that we don't handle (copysign x, cst) because this can always be // folded to an fneg or fabs. + // + // For ppc_fp128: + // fold (bitcast (fcopysign cst, x)) -> + // flipbit = (and (extract_element + // (xor (bitcast cst), (bitcast x)), 0), + // signbit) + // (xor (bitcast cst) (build_pair flipbit, flipbit)) if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(0)) && VT.isInteger() && !VT.isVector()) { @@ -7224,6 +7405,30 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { AddToWorklist(X.getNode()); } + if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) { + APInt SignBit = APInt::getSignBit(VT.getSizeInBits() / 2); + SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0.getOperand(0)), VT, + N0.getOperand(0)); + AddToWorklist(Cst.getNode()); + SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0.getOperand(1)), VT, + N0.getOperand(1)); + AddToWorklist(X.getNode()); + SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X); + AddToWorklist(XorResult.getNode()); + SDValue XorResult64 = DAG.getNode( + ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult, + DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG), + SDLoc(XorResult))); + AddToWorklist(XorResult64.getNode()); + SDValue FlipBit = + DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64, + DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64)); + AddToWorklist(FlipBit.getNode()); + SDValue FlipBits = + DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit); + AddToWorklist(FlipBits.getNode()); + return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits); + } APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); X = DAG.getNode(ISD::AND, SDLoc(X), VT, X, DAG.getConstant(SignBit, SDLoc(X), VT)); @@ -7240,11 +7445,9 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { } // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive. - if (N0.getOpcode() == ISD::BUILD_PAIR) { - SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT); - if (CombineLD.getNode()) + if (N0.getOpcode() == ISD::BUILD_PAIR) + if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT)) return CombineLD; - } // Remove double bitcasts from shuffles - this is often a legacy of // XformToShuffleWithZero being used to combine bitmaskings (of @@ -7257,10 +7460,10 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0); // If operands are a bitcast, peek through if it casts the original VT. - // If operands are a UNDEF or constant, just bitcast back to original VT. + // If operands are a constant, just bitcast back to original VT. auto PeekThroughBitcast = [&](SDValue Op) { if (Op.getOpcode() == ISD::BITCAST && - Op.getOperand(0)->getValueType(0) == VT) + Op.getOperand(0).getValueType() == VT) return SDValue(Op.getOperand(0)); if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) || ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) @@ -7431,28 +7634,34 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { SDLoc SL(N); const TargetOptions &Options = DAG.getTarget().Options; - bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast || - Options.UnsafeFPMath); + bool AllowFusion = + (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath); // Floating-point multiply-add with intermediate rounding. - bool HasFMAD = (LegalOperations && - TLI.isOperationLegal(ISD::FMAD, VT)); + bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); // Floating-point multiply-add without intermediate rounding. - bool HasFMA = ((!LegalOperations || - TLI.isOperationLegalOrCustom(ISD::FMA, VT)) && - TLI.isFMAFasterThanFMulAndFAdd(VT) && - UnsafeFPMath); + bool HasFMA = + AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) && + (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); // No valid opcode, do not combine. if (!HasFMAD && !HasFMA) return SDValue(); // Always prefer FMAD to FMA for precision. - unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; + unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; bool Aggressive = TLI.enableAggressiveFMAFusion(VT); bool LookThroughFPExt = TLI.isFPExtFree(VT); + // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), + // prefer to fold the multiply with fewer uses. + if (Aggressive && N0.getOpcode() == ISD::FMUL && + N1.getOpcode() == ISD::FMUL) { + if (N0.getNode()->use_size() > N1.getNode()->use_size()) + std::swap(N0, N1); + } + // fold (fadd (fmul x, y), z) -> (fma x, y, z) if (N0.getOpcode() == ISD::FMUL && (Aggressive || N0->hasOneUse())) { @@ -7469,7 +7678,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { } // Look through FP_EXTEND nodes to do more combining. - if (UnsafeFPMath && LookThroughFPExt) { + if (AllowFusion && LookThroughFPExt) { // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) if (N0.getOpcode() == ISD::FP_EXTEND) { SDValue N00 = N0.getOperand(0); @@ -7495,7 +7704,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { } // More folding opportunities when target permits. - if ((UnsafeFPMath || HasFMAD) && Aggressive) { + if ((AllowFusion || HasFMAD) && Aggressive) { // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) if (N0.getOpcode() == PreferredFusedOpcode && N0.getOperand(2).getOpcode() == ISD::FMUL) { @@ -7518,7 +7727,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { N0)); } - if (UnsafeFPMath && LookThroughFPExt) { + if (AllowFusion && LookThroughFPExt) { // fold (fadd (fma x, y, (fpext (fmul u, v))), z) // -> (fma x, y, (fma (fpext u), (fpext v), z)) auto FoldFAddFMAFPExtFMul = [&] ( @@ -7608,25 +7817,23 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { SDLoc SL(N); const TargetOptions &Options = DAG.getTarget().Options; - bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast || - Options.UnsafeFPMath); + bool AllowFusion = + (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath); // Floating-point multiply-add with intermediate rounding. - bool HasFMAD = (LegalOperations && - TLI.isOperationLegal(ISD::FMAD, VT)); + bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); // Floating-point multiply-add without intermediate rounding. - bool HasFMA = ((!LegalOperations || - TLI.isOperationLegalOrCustom(ISD::FMA, VT)) && - TLI.isFMAFasterThanFMulAndFAdd(VT) && - UnsafeFPMath); + bool HasFMA = + AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) && + (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); // No valid opcode, do not combine. if (!HasFMAD && !HasFMA) return SDValue(); // Always prefer FMAD to FMA for precision. - unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; + unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; bool Aggressive = TLI.enableAggressiveFMAFusion(VT); bool LookThroughFPExt = TLI.isFPExtFree(VT); @@ -7659,7 +7866,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { } // Look through FP_EXTEND nodes to do more combining. - if (UnsafeFPMath && LookThroughFPExt) { + if (AllowFusion && LookThroughFPExt) { // fold (fsub (fpext (fmul x, y)), z) // -> (fma (fpext x), (fpext y), (fneg z)) if (N0.getOpcode() == ISD::FP_EXTEND) { @@ -7735,7 +7942,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { } // More folding opportunities when target permits. - if ((UnsafeFPMath || HasFMAD) && Aggressive) { + if ((AllowFusion || HasFMAD) && Aggressive) { // fold (fsub (fma x, y, (fmul u, v)), z) // -> (fma x, y (fma u, v, (fneg z))) if (N0.getOpcode() == PreferredFusedOpcode && @@ -7765,7 +7972,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { N21, N0)); } - if (UnsafeFPMath && LookThroughFPExt) { + if (AllowFusion && LookThroughFPExt) { // fold (fsub (fma x, y, (fpext (fmul u, v))), z) // -> (fma x, y (fma (fpext u), (fpext v), (fneg z))) if (N0.getOpcode() == PreferredFusedOpcode) { @@ -7866,14 +8073,97 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { return SDValue(); } +/// Try to perform FMA combining on a given FMUL node. +SDValue DAGCombiner::visitFMULForFMACombine(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + SDLoc SL(N); + + assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation"); + + const TargetOptions &Options = DAG.getTarget().Options; + bool AllowFusion = + (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath); + + // Floating-point multiply-add with intermediate rounding. + bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); + + // Floating-point multiply-add without intermediate rounding. + bool HasFMA = + AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) && + (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); + + // No valid opcode, do not combine. + if (!HasFMAD && !HasFMA) + return SDValue(); + + // Always prefer FMAD to FMA for precision. + unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; + bool Aggressive = TLI.enableAggressiveFMAFusion(VT); + + // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y) + // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y)) + auto FuseFADD = [&](SDValue X, SDValue Y) { + if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) { + auto XC1 = isConstOrConstSplatFP(X.getOperand(1)); + if (XC1 && XC1->isExactlyValue(+1.0)) + return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y); + if (XC1 && XC1->isExactlyValue(-1.0)) + return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, + DAG.getNode(ISD::FNEG, SL, VT, Y)); + } + return SDValue(); + }; + + if (SDValue FMA = FuseFADD(N0, N1)) + return FMA; + if (SDValue FMA = FuseFADD(N1, N0)) + return FMA; + + // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y) + // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y)) + // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y)) + // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y) + auto FuseFSUB = [&](SDValue X, SDValue Y) { + if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) { + auto XC0 = isConstOrConstSplatFP(X.getOperand(0)); + if (XC0 && XC0->isExactlyValue(+1.0)) + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y, + Y); + if (XC0 && XC0->isExactlyValue(-1.0)) + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y, + DAG.getNode(ISD::FNEG, SL, VT, Y)); + + auto XC1 = isConstOrConstSplatFP(X.getOperand(1)); + if (XC1 && XC1->isExactlyValue(+1.0)) + return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, + DAG.getNode(ISD::FNEG, SL, VT, Y)); + if (XC1 && XC1->isExactlyValue(-1.0)) + return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y); + } + return SDValue(); + }; + + if (SDValue FMA = FuseFSUB(N0, N1)) + return FMA; + if (SDValue FMA = FuseFSUB(N1, N0)) + return FMA; + + return SDValue(); +} + SDValue DAGCombiner::visitFADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); - ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0); + bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1); EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; + const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags; // fold vector ops if (VT.isVector()) @@ -7882,23 +8172,23 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // fold (fadd c1, c2) -> c1 + c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FADD, DL, VT, N0, N1); + return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags); // canonicalize constant to RHS if (N0CFP && !N1CFP) - return DAG.getNode(ISD::FADD, DL, VT, N1, N0); + return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags); // fold (fadd A, (fneg B)) -> (fsub A, B) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2) return DAG.getNode(ISD::FSUB, DL, VT, N0, - GetNegatedExpression(N1, DAG, LegalOperations)); + GetNegatedExpression(N1, DAG, LegalOperations), Flags); // fold (fadd (fneg A), B) -> (fsub B, A) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2) return DAG.getNode(ISD::FSUB, DL, VT, N1, - GetNegatedExpression(N0, DAG, LegalOperations)); + GetNegatedExpression(N0, DAG, LegalOperations), Flags); // If 'unsafe math' is enabled, fold lots of things. if (Options.UnsafeFPMath) { @@ -7907,14 +8197,17 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { bool AllowNewConst = (Level < AfterLegalizeDAG); // fold (fadd A, 0) -> A - if (N1CFP && N1CFP->isZero()) - return N0; + if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1)) + if (N1C->isZero()) + return N0; // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && - isa<ConstantFPSDNode>(N0.getOperand(1))) + isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), - DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1)); + DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, + Flags), + Flags); // If allowed, fold (fadd (fneg x), x) -> 0.0 if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) @@ -7929,64 +8222,64 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // of rounding steps. if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) { if (N0.getOpcode() == ISD::FMUL) { - ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); - ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); + bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); + bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)); // (fadd (fmul x, c), x) -> (fmul x, c+1) if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { - SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0), - DAG.getConstantFP(1.0, DL, VT)); - return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP); + SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), + DAG.getConstantFP(1.0, DL, VT), Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags); } // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2) if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && N1.getOperand(0) == N1.getOperand(1) && N0.getOperand(0) == N1.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0), - DAG.getConstantFP(2.0, DL, VT)); - return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP); + SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), + DAG.getConstantFP(2.0, DL, VT), Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags); } } if (N1.getOpcode() == ISD::FMUL) { - ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); - ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1)); + bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); + bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1)); // (fadd x, (fmul x, c)) -> (fmul x, c+1) if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { - SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0), - DAG.getConstantFP(1.0, DL, VT)); - return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP); + SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1), + DAG.getConstantFP(1.0, DL, VT), Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags); } // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2) if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1) && N1.getOperand(0) == N0.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0), - DAG.getConstantFP(2.0, DL, VT)); - return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP); + SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1), + DAG.getConstantFP(2.0, DL, VT), Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags); } } if (N0.getOpcode() == ISD::FADD && AllowNewConst) { - ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); + bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); // (fadd (fadd x, x), x) -> (fmul x, 3.0) - if (!CFP && N0.getOperand(0) == N0.getOperand(1) && + if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) && (N0.getOperand(0) == N1)) { return DAG.getNode(ISD::FMUL, DL, VT, - N1, DAG.getConstantFP(3.0, DL, VT)); + N1, DAG.getConstantFP(3.0, DL, VT), Flags); } } if (N1.getOpcode() == ISD::FADD && AllowNewConst) { - ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); + bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); // (fadd x, (fadd x, x)) -> (fmul x, 3.0) if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && N1.getOperand(0) == N0) { return DAG.getNode(ISD::FMUL, DL, VT, - N0, DAG.getConstantFP(3.0, DL, VT)); + N0, DAG.getConstantFP(3.0, DL, VT), Flags); } } @@ -7996,15 +8289,14 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { N0.getOperand(0) == N0.getOperand(1) && N1.getOperand(0) == N1.getOperand(1) && N0.getOperand(0) == N1.getOperand(0)) { - return DAG.getNode(ISD::FMUL, DL, VT, - N0.getOperand(0), DAG.getConstantFP(4.0, DL, VT)); + return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), + DAG.getConstantFP(4.0, DL, VT), Flags); } } } // enable-unsafe-fp-math // FADD -> FMA combines: - SDValue Fused = visitFADDForFMACombine(N); - if (Fused) { + if (SDValue Fused = visitFADDForFMACombine(N)) { AddToWorklist(Fused.getNode()); return Fused; } @@ -8020,6 +8312,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { EVT VT = N->getValueType(0); SDLoc dl(N); const TargetOptions &Options = DAG.getTarget().Options; + const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags; // fold vector ops if (VT.isVector()) @@ -8028,12 +8321,12 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // fold (fsub c1, c2) -> c1-c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FSUB, dl, VT, N0, N1); + return DAG.getNode(ISD::FSUB, dl, VT, N0, N1, Flags); // fold (fsub A, (fneg B)) -> (fadd A, B) if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) return DAG.getNode(ISD::FADD, dl, VT, N0, - GetNegatedExpression(N1, DAG, LegalOperations)); + GetNegatedExpression(N1, DAG, LegalOperations), Flags); // If 'unsafe math' is enabled, fold lots of things. if (Options.UnsafeFPMath) { @@ -8068,8 +8361,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } // FSUB -> FMA combines: - SDValue Fused = visitFSUBForFMACombine(N); - if (Fused) { + if (SDValue Fused = visitFSUBForFMACombine(N)) { AddToWorklist(Fused.getNode()); return Fused; } @@ -8085,6 +8377,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; + const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags; // fold vector ops if (VT.isVector()) { @@ -8095,12 +8388,12 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // fold (fmul c1, c2) -> c1*c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FMUL, DL, VT, N0, N1); + return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags); // canonicalize constant to RHS if (isConstantFPBuildVectorOrConstantFP(N0) && !isConstantFPBuildVectorOrConstantFP(N1)) - return DAG.getNode(ISD::FMUL, DL, VT, N1, N0); + return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags); // fold (fmul A, 1.0) -> A if (N1CFP && N1CFP->isExactlyValue(1.0)) @@ -8129,8 +8422,8 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // the second operand of the outer multiply are constants. if ((N1CFP && isConstOrConstSplatFP(N01)) || (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) { - SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1); - return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts); + SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags); } } } @@ -8139,16 +8432,18 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs // during an early run of DAGCombiner can prevent folding with fmuls // inserted during lowering. - if (N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1)) { + if (N0.getOpcode() == ISD::FADD && + (N0.getOperand(0) == N0.getOperand(1)) && + N0.hasOneUse()) { const SDValue Two = DAG.getConstantFP(2.0, DL, VT); - SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1); - return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts); + SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags); } } // fold (fmul X, 2.0) -> (fadd X, X) if (N1CFP && N1CFP->isExactlyValue(+2.0)) - return DAG.getNode(ISD::FADD, DL, VT, N0, N0); + return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags); // fold (fmul X, -1.0) -> (fneg X) if (N1CFP && N1CFP->isExactlyValue(-1.0)) @@ -8163,10 +8458,17 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { if (LHSNeg == 2 || RHSNeg == 2) return DAG.getNode(ISD::FMUL, DL, VT, GetNegatedExpression(N0, DAG, LegalOperations), - GetNegatedExpression(N1, DAG, LegalOperations)); + GetNegatedExpression(N1, DAG, LegalOperations), + Flags); } } + // FMUL -> FMA combines: + if (SDValue Fused = visitFMULForFMACombine(N)) { + AddToWorklist(Fused.getNode()); + return Fused; + } + return SDValue(); } @@ -8193,66 +8495,145 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { if (N1CFP && N1CFP->isZero()) return N2; } + // TODO: The FMA node should have flags that propagate to these nodes. if (N0CFP && N0CFP->isExactlyValue(1.0)) return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2); if (N1CFP && N1CFP->isExactlyValue(1.0)) return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2); // Canonicalize (fma c, x, y) -> (fma x, c, y) - if (N0CFP && !N1CFP) + if (isConstantFPBuildVectorOrConstantFP(N0) && + !isConstantFPBuildVectorOrConstantFP(N1)) return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2); - // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) - if (Options.UnsafeFPMath && N1CFP && - N2.getOpcode() == ISD::FMUL && - N0 == N2.getOperand(0) && - N2.getOperand(1).getOpcode() == ISD::ConstantFP) { - return DAG.getNode(ISD::FMUL, dl, VT, N0, - DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1))); - } + // TODO: FMA nodes should have flags that propagate to the created nodes. + // For now, create a Flags object for use with all unsafe math transforms. + SDNodeFlags Flags; + Flags.setUnsafeAlgebra(true); + if (Options.UnsafeFPMath) { + // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) + if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) && + isConstantFPBuildVectorOrConstantFP(N1) && + isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) { + return DAG.getNode(ISD::FMUL, dl, VT, N0, + DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1), + &Flags), &Flags); + } - // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) - if (Options.UnsafeFPMath && - N0.getOpcode() == ISD::FMUL && N1CFP && - N0.getOperand(1).getOpcode() == ISD::ConstantFP) { - return DAG.getNode(ISD::FMA, dl, VT, - N0.getOperand(0), - DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1)), - N2); + // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) + if (N0.getOpcode() == ISD::FMUL && + isConstantFPBuildVectorOrConstantFP(N1) && + isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) { + return DAG.getNode(ISD::FMA, dl, VT, + N0.getOperand(0), + DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1), + &Flags), + N2); + } } // (fma x, 1, y) -> (fadd x, y) // (fma x, -1, y) -> (fadd (fneg x), y) if (N1CFP) { if (N1CFP->isExactlyValue(1.0)) + // TODO: The FMA node should have flags that propagate to this node. return DAG.getNode(ISD::FADD, dl, VT, N0, N2); if (N1CFP->isExactlyValue(-1.0) && (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) { SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0); AddToWorklist(RHSNeg.getNode()); + // TODO: The FMA node should have flags that propagate to this node. return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg); } } - // (fma x, c, x) -> (fmul x, (c+1)) - if (Options.UnsafeFPMath && N1CFP && N0 == N2) - return DAG.getNode(ISD::FMUL, dl, VT, N0, - DAG.getNode(ISD::FADD, dl, VT, - N1, DAG.getConstantFP(1.0, dl, VT))); - - // (fma x, c, (fneg x)) -> (fmul x, (c-1)) - if (Options.UnsafeFPMath && N1CFP && - N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) + if (Options.UnsafeFPMath) { + // (fma x, c, x) -> (fmul x, (c+1)) + if (N1CFP && N0 == N2) { return DAG.getNode(ISD::FMUL, dl, VT, N0, - DAG.getNode(ISD::FADD, dl, VT, - N1, DAG.getConstantFP(-1.0, dl, VT))); + DAG.getNode(ISD::FADD, dl, VT, + N1, DAG.getConstantFP(1.0, dl, VT), + &Flags), &Flags); + } + // (fma x, c, (fneg x)) -> (fmul x, (c-1)) + if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) { + return DAG.getNode(ISD::FMUL, dl, VT, N0, + DAG.getNode(ISD::FADD, dl, VT, + N1, DAG.getConstantFP(-1.0, dl, VT), + &Flags), &Flags); + } + } return SDValue(); } +// Combine multiple FDIVs with the same divisor into multiple FMULs by the +// reciprocal. +// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip) +// Notice that this is not always beneficial. One reason is different target +// may have different costs for FDIV and FMUL, so sometimes the cost of two +// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason +// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL". +SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) { + bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath; + const SDNodeFlags *Flags = N->getFlags(); + if (!UnsafeMath && !Flags->hasAllowReciprocal()) + return SDValue(); + + // Skip if current node is a reciprocal. + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + if (N0CFP && N0CFP->isExactlyValue(1.0)) + return SDValue(); + + // Exit early if the target does not want this transform or if there can't + // possibly be enough uses of the divisor to make the transform worthwhile. + SDValue N1 = N->getOperand(1); + unsigned MinUses = TLI.combineRepeatedFPDivisors(); + if (!MinUses || N1->use_size() < MinUses) + return SDValue(); + + // Find all FDIV users of the same divisor. + // Use a set because duplicates may be present in the user list. + SetVector<SDNode *> Users; + for (auto *U : N1->uses()) { + if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) { + // This division is eligible for optimization only if global unsafe math + // is enabled or if this division allows reciprocal formation. + if (UnsafeMath || U->getFlags()->hasAllowReciprocal()) + Users.insert(U); + } + } + + // Now that we have the actual number of divisor uses, make sure it meets + // the minimum threshold specified by the target. + if (Users.size() < MinUses) + return SDValue(); + + EVT VT = N->getValueType(0); + SDLoc DL(N); + SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); + SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags); + + // Dividend / Divisor -> Dividend * Reciprocal + for (auto *U : Users) { + SDValue Dividend = U->getOperand(0); + if (Dividend != FPOne) { + SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend, + Reciprocal, Flags); + CombineTo(U, NewNode); + } else if (U != Reciprocal.getNode()) { + // In the absence of fast-math-flags, this user node is always the + // same node as Reciprocal, but with FMF they may be different nodes. + CombineTo(U, Reciprocal); + } + } + return SDValue(N, 0); // N was replaced. +} + SDValue DAGCombiner::visitFDIV(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -8261,6 +8642,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; + SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags; // fold vector ops if (VT.isVector()) @@ -8269,7 +8651,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // fold (fdiv c1, c2) -> c1/c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1); + return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags); if (Options.UnsafeFPMath) { // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. @@ -8288,28 +8670,30 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) || TLI.isFPImmLegal(Recip, VT))) return DAG.getNode(ISD::FMUL, DL, VT, N0, - DAG.getConstantFP(Recip, DL, VT)); + DAG.getConstantFP(Recip, DL, VT), Flags); } // If this FDIV is part of a reciprocal square root, it may be folded // into a target-specific square root estimate instruction. if (N1.getOpcode() == ISD::FSQRT) { - if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0))) { - return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); + if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0), Flags)) { + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); } } else if (N1.getOpcode() == ISD::FP_EXTEND && N1.getOperand(0).getOpcode() == ISD::FSQRT) { - if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { + if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0), + Flags)) { RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV); AddToWorklist(RV.getNode()); - return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); } } else if (N1.getOpcode() == ISD::FP_ROUND && N1.getOperand(0).getOpcode() == ISD::FSQRT) { - if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { + if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0), + Flags)) { RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1)); AddToWorklist(RV.getNode()); - return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); } } else if (N1.getOpcode() == ISD::FMUL) { // Look through an FMUL. Even though this won't remove the FDIV directly, @@ -8326,18 +8710,18 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { if (SqrtOp.getNode()) { // We found a FSQRT, so try to make this fold: // x / (y * sqrt(z)) -> x * (rsqrt(z) / y) - if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0))) { - RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp); + if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) { + RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags); AddToWorklist(RV.getNode()); - return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); } } } // Fold into a reciprocal estimate and multiply instead of a real divide. - if (SDValue RV = BuildReciprocalEstimate(N1)) { + if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) { AddToWorklist(RV.getNode()); - return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); } } @@ -8349,52 +8733,13 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { if (LHSNeg == 2 || RHSNeg == 2) return DAG.getNode(ISD::FDIV, SDLoc(N), VT, GetNegatedExpression(N0, DAG, LegalOperations), - GetNegatedExpression(N1, DAG, LegalOperations)); + GetNegatedExpression(N1, DAG, LegalOperations), + Flags); } } - // Combine multiple FDIVs with the same divisor into multiple FMULs by the - // reciprocal. - // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip) - // Notice that this is not always beneficial. One reason is different target - // may have different costs for FDIV and FMUL, so sometimes the cost of two - // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason - // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL". - if (Options.UnsafeFPMath) { - // Skip if current node is a reciprocal. - if (N0CFP && N0CFP->isExactlyValue(1.0)) - return SDValue(); - - // Find all FDIV users of the same divisor. - // Use a set because duplicates may be present in the user list. - SetVector<SDNode *> Users; - for (auto *U : N1->uses()) - if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) - Users.insert(U); - - if (TLI.combineRepeatedFPDivisors(Users.size())) { - SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); - // FIXME: This optimization requires some level of fast-math, so the - // created reciprocal node should at least have the 'allowReciprocal' - // fast-math-flag set. - SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1); - - // Dividend / Divisor -> Dividend * Reciprocal - for (auto *U : Users) { - SDValue Dividend = U->getOperand(0); - if (Dividend != FPOne) { - SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend, - Reciprocal); - CombineTo(U, NewNode); - } else if (U != Reciprocal.getNode()) { - // In the absence of fast-math-flags, this user node is always the - // same node as Reciprocal, but with FMF they may be different nodes. - CombineTo(U, Reciprocal); - } - } - return SDValue(N, 0); // N was replaced. - } - } + if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N)) + return CombineRepeatedDivisors; return SDValue(); } @@ -8408,7 +8753,8 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { // fold (frem c1, c2) -> fmod(c1,c2) if (N0CFP && N1CFP) - return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1); + return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, + &cast<BinaryWithFlagsSDNode>(N)->Flags); return SDValue(); } @@ -8417,20 +8763,25 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) { if (!DAG.getTarget().Options.UnsafeFPMath || TLI.isFsqrtCheap()) return SDValue(); + // TODO: FSQRT nodes should have flags that propagate to the created nodes. + // For now, create a Flags object for use with all unsafe math transforms. + SDNodeFlags Flags; + Flags.setUnsafeAlgebra(true); + // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5) - SDValue RV = BuildRsqrtEstimate(N->getOperand(0)); + SDValue RV = BuildRsqrtEstimate(N->getOperand(0), &Flags); if (!RV) return SDValue(); - + EVT VT = RV.getValueType(); SDLoc DL(N); - RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV); + RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV, &Flags); AddToWorklist(RV.getNode()); // Unfortunately, RV is now NaN if the input was exactly 0. // Select out this case and force the answer to 0. SDValue Zero = DAG.getConstantFP(0.0, DL, VT); - EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + EVT CCVT = getSetCCResultType(VT); SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, N->getOperand(0), Zero, ISD::SETEQ); AddToWorklist(ZeroCmp.getNode()); AddToWorklist(RV.getNode()); @@ -8439,6 +8790,22 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) { ZeroCmp, Zero, RV); } +static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) { + // copysign(x, fp_extend(y)) -> copysign(x, y) + // copysign(x, fp_round(y)) -> copysign(x, y) + // Do not optimize out type conversion of f128 type yet. + // For some target like x86_64, configuration is changed + // to keep one f128 value in one SSE register, but + // instruction selection cannot handle FCOPYSIGN on + // SSE registers yet. + SDValue N1 = N->getOperand(1); + EVT N1VT = N1->getValueType(0); + EVT N1Op0VT = N1->getOperand(0)->getValueType(0); + return (N1.getOpcode() == ISD::FP_EXTEND || + N1.getOpcode() == ISD::FP_ROUND) && + (N1VT == N1Op0VT || N1Op0VT != MVT::f128); +} + SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -8482,7 +8849,7 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { // copysign(x, fp_extend(y)) -> copysign(x, y) // copysign(x, fp_round(y)) -> copysign(x, y) - if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND) + if (CanCombineFCOPYSIGN_EXTEND_ROUND(N)) return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0)); @@ -8837,11 +9204,12 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { APFloat CVal = CFP1->getValueAPF(); CVal.changeSign(); if (Level >= AfterLegalizeDAG && - (TLI.isFPImmLegal(CVal, N->getValueType(0)) || - TLI.isOperationLegal(ISD::ConstantFP, N->getValueType(0)))) - return DAG.getNode( - ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), - DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1))); + (TLI.isFPImmLegal(CVal, VT) || + TLI.isOperationLegal(ISD::ConstantFP, VT))) + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), + DAG.getNode(ISD::FNEG, SDLoc(N), VT, + N0.getOperand(1)), + &cast<BinaryWithFlagsSDNode>(N0)->Flags); } } @@ -8851,20 +9219,20 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { SDValue DAGCombiner::visitFMINNUM(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - const ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); - const ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + EVT VT = N->getValueType(0); + const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); + const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); if (N0CFP && N1CFP) { const APFloat &C0 = N0CFP->getValueAPF(); const APFloat &C1 = N1CFP->getValueAPF(); - return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), N->getValueType(0)); + return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT); } - if (N0CFP) { - EVT VT = N->getValueType(0); - // Canonicalize to constant on RHS. + // Canonicalize to constant on RHS. + if (isConstantFPBuildVectorOrConstantFP(N0) && + !isConstantFPBuildVectorOrConstantFP(N1)) return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0); - } return SDValue(); } @@ -8872,20 +9240,20 @@ SDValue DAGCombiner::visitFMINNUM(SDNode *N) { SDValue DAGCombiner::visitFMAXNUM(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - const ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); - const ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + EVT VT = N->getValueType(0); + const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); + const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); if (N0CFP && N1CFP) { const APFloat &C0 = N0CFP->getValueAPF(); const APFloat &C1 = N1CFP->getValueAPF(); - return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), N->getValueType(0)); + return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT); } - if (N0CFP) { - EVT VT = N->getValueType(0); - // Canonicalize to constant on RHS. + // Canonicalize to constant on RHS. + if (isConstantFPBuildVectorOrConstantFP(N0) && + !isConstantFPBuildVectorOrConstantFP(N1)) return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0); - } return SDValue(); } @@ -9034,8 +9402,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { SDValue Op1 = TheXor->getOperand(1); if (Op0.getOpcode() == Op1.getOpcode()) { // Avoid missing important xor optimizations. - SDValue Tmp = visitXOR(TheXor); - if (Tmp.getNode()) { + if (SDValue Tmp = visitXOR(TheXor)) { if (Tmp.getNode() != TheXor) { DEBUG(dbgs() << "\nReplacing.8 "; TheXor->dump(&DAG); @@ -9722,8 +10089,8 @@ struct LoadedSlice { void addSliceGain(const LoadedSlice &LS) { // Each slice saves a truncate. const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo(); - if (!TLI.isTruncateFree(LS.Inst->getValueType(0), - LS.Inst->getOperand(0).getValueType())) + if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(), + LS.Inst->getValueType(0))) ++Truncates; // If there is a shift amount, this slice gets rid of it. if (LS.Shift) @@ -10625,30 +10992,109 @@ struct BaseIndexOffset { }; } // namespace +// This is a helper function for visitMUL to check the profitability +// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). +// MulNode is the original multiply, AddNode is (add x, c1), +// and ConstNode is c2. +// +// If the (add x, c1) has multiple uses, we could increase +// the number of adds if we make this transformation. +// It would only be worth doing this if we can remove a +// multiply in the process. Check for that here. +// To illustrate: +// (A + c1) * c3 +// (A + c2) * c3 +// We're checking for cases where we have common "c3 * A" expressions. +bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, + SDValue &AddNode, + SDValue &ConstNode) { + APInt Val; + + // If the add only has one use, this would be OK to do. + if (AddNode.getNode()->hasOneUse()) + return true; + + // Walk all the users of the constant with which we're multiplying. + for (SDNode *Use : ConstNode->uses()) { + + if (Use == MulNode) // This use is the one we're on right now. Skip it. + continue; + + if (Use->getOpcode() == ISD::MUL) { // We have another multiply use. + SDNode *OtherOp; + SDNode *MulVar = AddNode.getOperand(0).getNode(); + + // OtherOp is what we're multiplying against the constant. + if (Use->getOperand(0) == ConstNode) + OtherOp = Use->getOperand(1).getNode(); + else + OtherOp = Use->getOperand(0).getNode(); + + // Check to see if multiply is with the same operand of our "add". + // + // ConstNode = CONST + // Use = ConstNode * A <-- visiting Use. OtherOp is A. + // ... + // AddNode = (A + c1) <-- MulVar is A. + // = AddNode * ConstNode <-- current visiting instruction. + // + // If we make this transformation, we will have a common + // multiply (ConstNode * A) that we can save. + if (OtherOp == MulVar) + return true; + + // Now check to see if a future expansion will give us a common + // multiply. + // + // ConstNode = CONST + // AddNode = (A + c1) + // ... = AddNode * ConstNode <-- current visiting instruction. + // ... + // OtherOp = (A + c2) + // Use = OtherOp * ConstNode <-- visiting Use. + // + // If we make this transformation, we will have a common + // multiply (CONST * A) after we also do the same transformation + // to the "t2" instruction. + if (OtherOp->getOpcode() == ISD::ADD && + isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) && + OtherOp->getOperand(0).getNode() == MulVar) + return true; + } + } + + // Didn't find a case where this would be profitable. + return false; +} + SDValue DAGCombiner::getMergedConstantVectorStore(SelectionDAG &DAG, SDLoc SL, ArrayRef<MemOpLink> Stores, + SmallVectorImpl<SDValue> &Chains, EVT Ty) const { SmallVector<SDValue, 8> BuildVector; - for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) - BuildVector.push_back(cast<StoreSDNode>(Stores[I].MemNode)->getValue()); + for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) { + StoreSDNode *St = cast<StoreSDNode>(Stores[I].MemNode); + Chains.push_back(St->getChain()); + BuildVector.push_back(St->getValue()); + } return DAG.getNode(ISD::BUILD_VECTOR, SL, Ty, BuildVector); } bool DAGCombiner::MergeStoresOfConstantsOrVecElts( SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, - unsigned NumElem, bool IsConstantSrc, bool UseVector) { + unsigned NumStores, bool IsConstantSrc, bool UseVector) { // Make sure we have something to merge. - if (NumElem < 2) + if (NumStores < 2) return false; int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8; LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; unsigned LatestNodeUsed = 0; - for (unsigned i=0; i < NumElem; ++i) { + for (unsigned i=0; i < NumStores; ++i) { // Find a chain for the new wide-store operand. Notice that some // of the store nodes that we found may not be selected for inclusion // in the wide store. The chain we use needs to be the chain of the @@ -10657,45 +11103,57 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( LatestNodeUsed = i; } + SmallVector<SDValue, 8> Chains; + // The latest Node in the DAG. LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode; SDLoc DL(StoreNodes[0].MemNode); SDValue StoredVal; if (UseVector) { - // Find a legal type for the vector store. - EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); + bool IsVec = MemVT.isVector(); + unsigned Elts = NumStores; + if (IsVec) { + // When merging vector stores, get the total number of elements. + Elts *= MemVT.getVectorNumElements(); + } + // Get the type for the merged vector store. + EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); assert(TLI.isTypeLegal(Ty) && "Illegal vector store"); + if (IsConstantSrc) { - StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Ty); + StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Chains, Ty); } else { SmallVector<SDValue, 8> Ops; - for (unsigned i = 0; i < NumElem ; ++i) { + for (unsigned i = 0; i < NumStores; ++i) { StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); SDValue Val = St->getValue(); - // All of the operands of a BUILD_VECTOR must have the same type. + // All operands of BUILD_VECTOR / CONCAT_VECTOR must have the same type. if (Val.getValueType() != MemVT) return false; Ops.push_back(Val); + Chains.push_back(St->getChain()); } // Build the extracted vector elements back into a vector. - StoredVal = DAG.getNode(ISD::BUILD_VECTOR, DL, Ty, Ops); - } + StoredVal = DAG.getNode(IsVec ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, + DL, Ty, Ops); } } else { // We should always use a vector store when merging extracted vector // elements, so this path implies a store of constants. assert(IsConstantSrc && "Merged vector elements should use vector store"); - unsigned SizeInBits = NumElem * ElementSizeBytes * 8; + unsigned SizeInBits = NumStores * ElementSizeBytes * 8; APInt StoreInt(SizeInBits, 0); // Construct a single integer constant which is made of the smaller // constant inputs. bool IsLE = DAG.getDataLayout().isLittleEndian(); - for (unsigned i = 0; i < NumElem ; ++i) { - unsigned Idx = IsLE ? (NumElem - 1 - i) : i; + for (unsigned i = 0; i < NumStores; ++i) { + unsigned Idx = IsLE ? (NumStores - 1 - i) : i; StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode); + Chains.push_back(St->getChain()); + SDValue Val = St->getValue(); StoreInt <<= ElementSizeBytes * 8; if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) { @@ -10712,7 +11170,10 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( StoredVal = DAG.getConstant(StoreInt, DL, StoreTy); } - SDValue NewStore = DAG.getStore(LatestOp->getChain(), DL, StoredVal, + assert(!Chains.empty()); + + SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); + SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), false, false, @@ -10721,7 +11182,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( // Replace the last store with the new store CombineTo(LatestOp, NewStore); // Erase all other stores. - for (unsigned i = 0; i < NumElem ; ++i) { + for (unsigned i = 0; i < NumStores; ++i) { if (StoreNodes[i].MemNode == LatestOp) continue; StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); @@ -10743,17 +11204,6 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( return true; } -static bool allowableAlignment(const SelectionDAG &DAG, - const TargetLowering &TLI, EVT EVTTy, - unsigned AS, unsigned Align) { - if (TLI.allowsMisalignedMemoryAccesses(EVTTy, AS, Align)) - return true; - - Type *Ty = EVTTy.getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = DAG.getDataLayout().getPrefTypeAlignment(Ty); - return (Align >= ABIAlignment); -} - void DAGCombiner::getStoreMergeAndAliasCandidates( StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes, SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes) { @@ -10775,6 +11225,38 @@ void DAGCombiner::getStoreMergeAndAliasCandidates( EVT MemVT = St->getMemoryVT(); unsigned Seq = 0; StoreSDNode *Index = St; + + + bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA + : DAG.getSubtarget().useAA(); + + if (UseAA) { + // Look at other users of the same chain. Stores on the same chain do not + // alias. If combiner-aa is enabled, non-aliasing stores are canonicalized + // to be on the same chain, so don't bother looking at adjacent chains. + + SDValue Chain = St->getChain(); + for (auto I = Chain->use_begin(), E = Chain->use_end(); I != E; ++I) { + if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) { + if (I.getOperandNo() != 0) + continue; + + if (OtherST->isVolatile() || OtherST->isIndexed()) + continue; + + if (OtherST->getMemoryVT() != MemVT) + continue; + + BaseIndexOffset Ptr = BaseIndexOffset::match(OtherST->getBasePtr()); + + if (Ptr.equalBaseIndex(BasePtr)) + StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset, Seq++)); + } + } + + return; + } + while (Index) { // If the chain has more than one use, then we can't reorder the mem ops. if (Index != St && !SDValue(Index, 0)->hasOneUse()) @@ -10800,6 +11282,13 @@ void DAGCombiner::getStoreMergeAndAliasCandidates( if (Index->getMemoryVT() != MemVT) break; + // We do not allow under-aligned stores in order to prevent + // overriding stores. NOTE: this is a bad hack. Alignment SHOULD + // be irrelevant here; what MATTERS is that we not move memory + // operations that potentially overlap past each-other. + if (Index->getAlignment() < MemVT.getStoreSize()) + break; + // We found a potential memory operand to merge. StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++)); @@ -10844,8 +11333,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (ElementSizeBytes * 8 != MemVT.getSizeInBits()) return false; - // Don't merge vectors into wider inputs. - if (MemVT.isVector() || !MemVT.isSimple()) + if (!MemVT.isSimple()) return false; // Perform an early exit check. Do not bother looking at stored values that @@ -10854,9 +11342,16 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { bool IsLoadSrc = isa<LoadSDNode>(StoredVal); bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) || isa<ConstantFPSDNode>(StoredVal); - bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT); + bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT || + StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR); - if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc) + if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc) + return false; + + // Don't merge vectors into wider vectors if the source data comes from loads. + // TODO: This restriction can be lifted by using logic similar to the + // ExtractVecSrc case. + if (MemVT.isVector() && IsLoadSrc) return false; // Only look at ends of store sequences. @@ -10868,22 +11363,28 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // We need to make sure that these nodes do not interfere with // any of the store nodes. SmallVector<LSBaseSDNode*, 8> AliasLoadNodes; - + // Save the StoreSDNodes that we find in the chain. SmallVector<MemOpLink, 8> StoreNodes; getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes); - + // Check if there is anything to merge. if (StoreNodes.size() < 2) return false; - // Sort the memory operands according to their distance from the base pointer. + // Sort the memory operands according to their distance from the + // base pointer. As a secondary criteria: make sure stores coming + // later in the code come first in the list. This is important for + // the non-UseAA case, because we're merging stores into the FINAL + // store along a chain which potentially contains aliasing stores. + // Thus, if there are multiple stores to the same address, the last + // one can be considered for merging but not the others. std::sort(StoreNodes.begin(), StoreNodes.end(), [](MemOpLink LHS, MemOpLink RHS) { return LHS.OffsetFromBase < RHS.OffsetFromBase || (LHS.OffsetFromBase == RHS.OffsetFromBase && - LHS.SequenceNum > RHS.SequenceNum); + LHS.SequenceNum < RHS.SequenceNum); }); // Scan the memory operations on the chain and find the first non-consecutive @@ -10900,15 +11401,12 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { break; } - bool Alias = false; // Check if this store interferes with any of the loads that we found. - for (unsigned ld = 0, lde = AliasLoadNodes.size(); ld < lde; ++ld) - if (isAlias(AliasLoadNodes[ld], StoreNodes[i].MemNode)) { - Alias = true; - break; - } - // We found a load that alias with this store. Stop the sequence. - if (Alias) + // If we find a load that alias with this store. Stop the sequence. + if (std::any_of(AliasLoadNodes.begin(), AliasLoadNodes.end(), + [&](LSBaseSDNode* Ldn) { + return isAlias(Ldn, StoreNodes[i].MemNode); + })) break; // Mark this node as useful. @@ -10919,6 +11417,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; unsigned FirstStoreAS = FirstInChain->getAddressSpace(); unsigned FirstStoreAlign = FirstInChain->getAlignment(); + LLVMContext &Context = *DAG.getContext(); + const DataLayout &DL = DAG.getDataLayout(); // Store the constants into memory as one consecutive store. if (IsConstantSrc) { @@ -10940,43 +11440,40 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // Find a legal type for the constant store. unsigned SizeInBits = (i+1) * ElementSizeBytes * 8; - EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits); + EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits); + bool IsFast; if (TLI.isTypeLegal(StoreTy) && - allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, - FirstStoreAlign)) { + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, + FirstStoreAlign, &IsFast) && IsFast) { LastLegalType = i+1; // Or check whether a truncstore is legal. - } else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) == + } else if (TLI.getTypeAction(Context, StoreTy) == TargetLowering::TypePromoteInteger) { EVT LegalizedStoredValueTy = - TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType()); + TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && - allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstStoreAS, - FirstStoreAlign)) { + TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, + FirstStoreAS, FirstStoreAlign, &IsFast) && + IsFast) { LastLegalType = i + 1; } } - // Find a legal type for the vector store. - EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); - if (TLI.isTypeLegal(Ty) && - allowableAlignment(DAG, TLI, Ty, FirstStoreAS, FirstStoreAlign)) { - LastLegalVectorType = i + 1; + // We only use vectors if the constant is known to be zero or the target + // allows it and the function is not marked with the noimplicitfloat + // attribute. + if ((!NonZero || TLI.storeOfVectorConstantIsCheap(MemVT, i+1, + FirstStoreAS)) && + !NoVectors) { + // Find a legal type for the vector store. + EVT Ty = EVT::getVectorVT(Context, MemVT, i+1); + if (TLI.isTypeLegal(Ty) && + TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, + FirstStoreAlign, &IsFast) && IsFast) + LastLegalVectorType = i + 1; } } - - // We only use vectors if the constant is known to be zero or the target - // allows it and the function is not marked with the noimplicitfloat - // attribute. - if (NoVectors) { - LastLegalVectorType = 0; - } else if (NonZero && !TLI.storeOfVectorConstantIsCheap(MemVT, - LastLegalVectorType, - FirstStoreAS)) { - LastLegalVectorType = 0; - } - // Check if we found a legal integer type to store. if (LastLegalType == 0 && LastLegalVectorType == 0) return false; @@ -10990,27 +11487,36 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // When extracting multiple vector elements, try to store them // in one vector store rather than a sequence of scalar stores. - if (IsExtractVecEltSrc) { - unsigned NumElem = 0; + if (IsExtractVecSrc) { + unsigned NumStoresToMerge = 0; + bool IsVec = MemVT.isVector(); for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) { StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); - SDValue StoredVal = St->getValue(); + unsigned StoreValOpcode = St->getValue().getOpcode(); // This restriction could be loosened. // Bail out if any stored values are not elements extracted from a vector. // It should be possible to handle mixed sources, but load sources need // more careful handling (see the block of code below that handles // consecutive loads). - if (StoredVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT) + if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT && + StoreValOpcode != ISD::EXTRACT_SUBVECTOR) return false; // Find a legal type for the vector store. - EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); + unsigned Elts = i + 1; + if (IsVec) { + // When merging vector stores, get the total number of elements. + Elts *= MemVT.getVectorNumElements(); + } + EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); + bool IsFast; if (TLI.isTypeLegal(Ty) && - allowableAlignment(DAG, TLI, Ty, FirstStoreAS, FirstStoreAlign)) - NumElem = i + 1; + TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, + FirstStoreAlign, &IsFast) && IsFast) + NumStoresToMerge = i + 1; } - return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, + return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumStoresToMerge, false, true); } @@ -11084,7 +11590,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { StartAddress = LoadNodes[0].OffsetFromBase; SDValue FirstChain = FirstLoad->getChain(); for (unsigned i = 1; i < LoadNodes.size(); ++i) { - // All loads much share the same chain. + // All loads must share the same chain. if (LoadNodes[i].MemNode->getChain() != FirstChain) break; @@ -11092,35 +11598,41 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (CurrAddress - StartAddress != (ElementSizeBytes * i)) break; LastConsecutiveLoad = i; - // Find a legal type for the vector store. - EVT StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); + EVT StoreTy = EVT::getVectorVT(Context, MemVT, i+1); + bool IsFastSt, IsFastLd; if (TLI.isTypeLegal(StoreTy) && - allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) && - allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign)) { + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, + FirstStoreAlign, &IsFastSt) && IsFastSt && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, + FirstLoadAlign, &IsFastLd) && IsFastLd) { LastLegalVectorType = i + 1; } // Find a legal type for the integer store. unsigned SizeInBits = (i+1) * ElementSizeBytes * 8; - StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits); + StoreTy = EVT::getIntegerVT(Context, SizeInBits); if (TLI.isTypeLegal(StoreTy) && - allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) && - allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign)) + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, + FirstStoreAlign, &IsFastSt) && IsFastSt && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, + FirstLoadAlign, &IsFastLd) && IsFastLd) LastLegalIntegerType = i + 1; // Or check whether a truncstore and extload is legal. - else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) == + else if (TLI.getTypeAction(Context, StoreTy) == TargetLowering::TypePromoteInteger) { EVT LegalizedStoredValueTy = - TLI.getTypeToTransformTo(*DAG.getContext(), StoreTy); + TLI.getTypeToTransformTo(Context, StoreTy); if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) && TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) && TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) && - allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstStoreAS, - FirstStoreAlign) && - allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstLoadAS, - FirstLoadAlign)) + TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, + FirstStoreAS, FirstStoreAlign, &IsFastSt) && + IsFastSt && + TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, + FirstLoadAS, FirstLoadAlign, &IsFastLd) && + IsFastLd) LastLegalIntegerType = i+1; } } @@ -11138,6 +11650,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (NumElem < 2) return false; + // Collect the chains from all merged stores. + SmallVector<SDValue, 8> MergeStoreChains; + MergeStoreChains.push_back(StoreNodes[0].MemNode->getChain()); + // The latest Node in the DAG. unsigned LatestNodeUsed = 0; for (unsigned i=1; i<NumElem; ++i) { @@ -11147,6 +11663,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // latest store node which is *used* and replaced by the wide store. if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum) LatestNodeUsed = i; + + MergeStoreChains.push_back(StoreNodes[i].MemNode->getChain()); } LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode; @@ -11155,34 +11673,33 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // to memory. EVT JointMemOpVT; if (UseVectorTy) { - JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); + JointMemOpVT = EVT::getVectorVT(Context, MemVT, NumElem); } else { unsigned SizeInBits = NumElem * ElementSizeBytes * 8; - JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), SizeInBits); + JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits); } SDLoc LoadDL(LoadNodes[0].MemNode); SDLoc StoreDL(StoreNodes[0].MemNode); + // The merged loads are required to have the same incoming chain, so + // using the first's chain is acceptable. SDValue NewLoad = DAG.getLoad( JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(), false, false, false, FirstLoadAlign); + SDValue NewStoreChain = + DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, MergeStoreChains); + SDValue NewStore = DAG.getStore( - LatestOp->getChain(), StoreDL, NewLoad, FirstInChain->getBasePtr(), + NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), false, false, FirstStoreAlign); - // Replace one of the loads with the new load. - LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[0].MemNode); - DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), - SDValue(NewLoad.getNode(), 1)); - - // Remove the rest of the load chains. - for (unsigned i = 1; i < NumElem ; ++i) { - // Replace all chain users of the old load nodes with the chain of the new - // load node. + // Transfer chain users from old loads to the new load. + for (unsigned i = 0; i < NumElem; ++i) { LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode); - DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain()); + DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), + SDValue(NewLoad.getNode(), 1)); } // Replace the last store with the new store. @@ -11200,6 +11717,114 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { return true; } +SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) { + SDLoc SL(ST); + SDValue ReplStore; + + // Replace the chain to avoid dependency. + if (ST->isTruncatingStore()) { + ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(), + ST->getBasePtr(), ST->getMemoryVT(), + ST->getMemOperand()); + } else { + ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(), + ST->getMemOperand()); + } + + // Create token to keep both nodes around. + SDValue Token = DAG.getNode(ISD::TokenFactor, SL, + MVT::Other, ST->getChain(), ReplStore); + + // Make sure the new and old chains are cleaned up. + AddToWorklist(Token.getNode()); + + // Don't add users to work list. + return CombineTo(ST, Token, false); +} + +SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { + SDValue Value = ST->getValue(); + if (Value.getOpcode() == ISD::TargetConstantFP) + return SDValue(); + + SDLoc DL(ST); + + SDValue Chain = ST->getChain(); + SDValue Ptr = ST->getBasePtr(); + + const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value); + + // NOTE: If the original store is volatile, this transform must not increase + // the number of stores. For example, on x86-32 an f64 can be stored in one + // processor operation but an i64 (which is not legal) requires two. So the + // transform should not be done in this case. + + SDValue Tmp; + switch (CFP->getSimpleValueType(0).SimpleTy) { + default: + llvm_unreachable("Unknown FP type"); + case MVT::f16: // We don't do this for these yet. + case MVT::f80: + case MVT::f128: + case MVT::ppcf128: + return SDValue(); + case MVT::f32: + if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) || + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { + ; + Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). + bitcastToAPInt().getZExtValue(), SDLoc(CFP), + MVT::i32); + return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand()); + } + + return SDValue(); + case MVT::f64: + if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && + !ST->isVolatile()) || + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { + ; + Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). + getZExtValue(), SDLoc(CFP), MVT::i64); + return DAG.getStore(Chain, DL, Tmp, + Ptr, ST->getMemOperand()); + } + + if (!ST->isVolatile() && + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { + // Many FP stores are not made apparent until after legalize, e.g. for + // argument passing. Since this is so common, custom legalize the + // 64-bit integer store into two 32-bit stores. + uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); + SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32); + SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32); + if (DAG.getDataLayout().isBigEndian()) + std::swap(Lo, Hi); + + unsigned Alignment = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); + AAMDNodes AAInfo = ST->getAAInfo(); + + SDValue St0 = DAG.getStore(Chain, DL, Lo, + Ptr, ST->getPointerInfo(), + isVolatile, isNonTemporal, + ST->getAlignment(), AAInfo); + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(4, DL, Ptr.getValueType())); + Alignment = MinAlign(Alignment, 4U); + SDValue St1 = DAG.getStore(Chain, DL, Hi, + Ptr, ST->getPointerInfo().getWithOffset(4), + isVolatile, isNonTemporal, + Alignment, AAInfo); + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + St0, St1); + } + + return SDValue(); + } +} + SDValue DAGCombiner::visitSTORE(SDNode *N) { StoreSDNode *ST = cast<StoreSDNode>(N); SDValue Chain = ST->getChain(); @@ -11227,81 +11852,6 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed()) return Chain; - // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' - if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) { - // NOTE: If the original store is volatile, this transform must not increase - // the number of stores. For example, on x86-32 an f64 can be stored in one - // processor operation but an i64 (which is not legal) requires two. So the - // transform should not be done in this case. - if (Value.getOpcode() != ISD::TargetConstantFP) { - SDValue Tmp; - switch (CFP->getSimpleValueType(0).SimpleTy) { - default: llvm_unreachable("Unknown FP type"); - case MVT::f16: // We don't do this for these yet. - case MVT::f80: - case MVT::f128: - case MVT::ppcf128: - break; - case MVT::f32: - if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) || - TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { - ; - Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). - bitcastToAPInt().getZExtValue(), SDLoc(CFP), - MVT::i32); - return DAG.getStore(Chain, SDLoc(N), Tmp, - Ptr, ST->getMemOperand()); - } - break; - case MVT::f64: - if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && - !ST->isVolatile()) || - TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { - ; - Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). - getZExtValue(), SDLoc(CFP), MVT::i64); - return DAG.getStore(Chain, SDLoc(N), Tmp, - Ptr, ST->getMemOperand()); - } - - if (!ST->isVolatile() && - TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { - // Many FP stores are not made apparent until after legalize, e.g. for - // argument passing. Since this is so common, custom legalize the - // 64-bit integer store into two 32-bit stores. - uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); - SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32); - SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32); - if (DAG.getDataLayout().isBigEndian()) - std::swap(Lo, Hi); - - unsigned Alignment = ST->getAlignment(); - bool isVolatile = ST->isVolatile(); - bool isNonTemporal = ST->isNonTemporal(); - AAMDNodes AAInfo = ST->getAAInfo(); - - SDLoc DL(N); - - SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo, - Ptr, ST->getPointerInfo(), - isVolatile, isNonTemporal, - ST->getAlignment(), AAInfo); - Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, - DAG.getConstant(4, DL, Ptr.getValueType())); - Alignment = MinAlign(Alignment, 4U); - SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi, - Ptr, ST->getPointerInfo().getWithOffset(4), - isVolatile, isNonTemporal, - Alignment, AAInfo); - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, - St0, St1); - } - - break; - } - } - } - // Try to infer better alignment information than the store already has. if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { @@ -11319,8 +11869,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // Try transforming a pair floating point load / store ops to integer // load / store ops. - SDValue NewST = TransformFPLoadStorePair(N); - if (NewST.getNode()) + if (SDValue NewST = TransformFPLoadStorePair(N)) return NewST; bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA @@ -11331,31 +11880,17 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { UseAA = false; #endif if (UseAA && ST->isUnindexed()) { - // Walk up chain skipping non-aliasing memory nodes. - SDValue BetterChain = FindBetterChain(N, Chain); - - // If there is a better chain. - if (Chain != BetterChain) { - SDValue ReplStore; - - // Replace the chain to avoid dependency. - if (ST->isTruncatingStore()) { - ReplStore = DAG.getTruncStore(BetterChain, SDLoc(N), Value, Ptr, - ST->getMemoryVT(), ST->getMemOperand()); - } else { - ReplStore = DAG.getStore(BetterChain, SDLoc(N), Value, Ptr, - ST->getMemOperand()); - } + // FIXME: We should do this even without AA enabled. AA will just allow + // FindBetterChain to work in more situations. The problem with this is that + // any combine that expects memory operations to be on consecutive chains + // first needs to be updated to look for users of the same chain. - // Create token to keep both nodes around. - SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N), - MVT::Other, Chain, ReplStore); - - // Make sure the new and old chains are cleaned up. - AddToWorklist(Token.getNode()); - - // Don't add users to work list. - return CombineTo(N, Token, false); + // Walk up chain skipping non-aliasing memory nodes, on this store and any + // adjacent stores. + if (findBetterNeighborChains(ST)) { + // replaceStoreChain uses CombineTo, which handled all of the worklist + // manipulation. Return the original node to not do anything else. + return SDValue(ST, 0); } } @@ -11440,6 +11975,16 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { return SDValue(N, 0); } + // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' + // + // Make sure to do this only after attempting to merge stores in order to + // avoid changing the types of some subset of stores due to visit order, + // preventing their merging. + if (isa<ConstantFPSDNode>(Value)) { + if (SDValue NewSt = replaceStoreOfFPConstant(ST)) + return NewSt; + } + return ReduceLoadOpStoreWidth(N); } @@ -11613,7 +12158,24 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { } SDValue EltNo = N->getOperand(1); - bool ConstEltNo = isa<ConstantSDNode>(EltNo); + ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo); + + // extract_vector_elt (build_vector x, y), 1 -> y + if (ConstEltNo && + InVec.getOpcode() == ISD::BUILD_VECTOR && + TLI.isTypeLegal(VT) && + (InVec.hasOneUse() || + TLI.aggressivelyPreferBuildVectorSources(VT))) { + SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue()); + EVT InEltVT = Elt.getValueType(); + + // Sometimes build_vector's scalar input types do not match result type. + if (NVT == InEltVT) + return Elt; + + // TODO: It may be useful to truncate if free if the build_vector implicitly + // converts. + } // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT. // We only perform this optimization before the op legalization phase because @@ -11621,13 +12183,11 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // patterns. For example on AVX, extracting elements from a wide vector // without using extract_subvector. However, if we can find an underlying // scalar value, then we can always use that. - if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE - && ConstEltNo) { - int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) { int NumElem = VT.getVectorNumElements(); ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec); // Find the new index to extract from. - int OrigElt = SVOp->getMaskElt(Elt); + int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue()); // Extracting an undef index is undef. if (OrigElt == -1) @@ -12183,12 +12743,90 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, Ops)); } -SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { - // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of - // EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector - // inputs come from at most two distinct vectors, turn this into a shuffle - // node. +// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR +// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at +// most two distinct vectors the same size as the result, attempt to turn this +// into a legal shuffle. +static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + EVT OpVT = N->getOperand(0).getValueType(); + int NumElts = VT.getVectorNumElements(); + int NumOpElts = OpVT.getVectorNumElements(); + + SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT); + SmallVector<int, 8> Mask; + + for (SDValue Op : N->ops()) { + // Peek through any bitcast. + while (Op.getOpcode() == ISD::BITCAST) + Op = Op.getOperand(0); + + // UNDEF nodes convert to UNDEF shuffle mask values. + if (Op.getOpcode() == ISD::UNDEF) { + Mask.append((unsigned)NumOpElts, -1); + continue; + } + + if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR) + return SDValue(); + + // What vector are we extracting the subvector from and at what index? + SDValue ExtVec = Op.getOperand(0); + + // We want the EVT of the original extraction to correctly scale the + // extraction index. + EVT ExtVT = ExtVec.getValueType(); + + // Peek through any bitcast. + while (ExtVec.getOpcode() == ISD::BITCAST) + ExtVec = ExtVec.getOperand(0); + + // UNDEF nodes convert to UNDEF shuffle mask values. + if (ExtVec.getOpcode() == ISD::UNDEF) { + Mask.append((unsigned)NumOpElts, -1); + continue; + } + + if (!isa<ConstantSDNode>(Op.getOperand(1))) + return SDValue(); + int ExtIdx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + + // Ensure that we are extracting a subvector from a vector the same + // size as the result. + if (ExtVT.getSizeInBits() != VT.getSizeInBits()) + return SDValue(); + + // Scale the subvector index to account for any bitcast. + int NumExtElts = ExtVT.getVectorNumElements(); + if (0 == (NumExtElts % NumElts)) + ExtIdx /= (NumExtElts / NumElts); + else if (0 == (NumElts % NumExtElts)) + ExtIdx *= (NumElts / NumExtElts); + else + return SDValue(); + // At most we can reference 2 inputs in the final shuffle. + if (SV0.getOpcode() == ISD::UNDEF || SV0 == ExtVec) { + SV0 = ExtVec; + for (int i = 0; i != NumOpElts; ++i) + Mask.push_back(i + ExtIdx); + } else if (SV1.getOpcode() == ISD::UNDEF || SV1 == ExtVec) { + SV1 = ExtVec; + for (int i = 0; i != NumOpElts; ++i) + Mask.push_back(i + ExtIdx + NumElts); + } else { + return SDValue(); + } + } + + if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT)) + return SDValue(); + + return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0), + DAG.getBitcast(VT, SV1), Mask); +} + +SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { // If we only have one input vector, we don't need to do any concatenation. if (N->getNumOperands() == 1) return N->getOperand(0); @@ -12289,6 +12927,11 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { if (SDValue V = combineConcatVectorOfScalars(N, DAG)) return V; + // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE. + if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) + if (SDValue V = combineConcatVectorOfExtracts(N, DAG)) + return V; + // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR // nodes often generate nop CONCAT_VECTOR nodes. // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that @@ -12503,7 +13146,7 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { std::all_of(SVN->getMask().begin() + NumElemsPerConcat, SVN->getMask().end(), [](int i) { return i == -1; })) { N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1), - ArrayRef<int>(SVN->getMask().begin(), NumElemsPerConcat)); + makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat)); N1 = DAG.getUNDEF(ConcatVT); return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1); } @@ -12981,6 +13624,21 @@ SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) { + SDValue N0 = N->getOperand(0); + + // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) + if (N0->getOpcode() == ISD::AND) { + ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1)); + if (AndConst && AndConst->getAPIntValue() == 0xffff) { + return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0), + N0.getOperand(0)); + } + } + + return SDValue(); +} + /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle /// with the destination vector and a zero vector. /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> @@ -13002,34 +13660,76 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { if (RHS.getOpcode() == ISD::BITCAST) RHS = RHS.getOperand(0); - if (RHS.getOpcode() == ISD::BUILD_VECTOR) { + if (RHS.getOpcode() != ISD::BUILD_VECTOR) + return SDValue(); + + EVT RVT = RHS.getValueType(); + unsigned NumElts = RHS.getNumOperands(); + + // Attempt to create a valid clear mask, splitting the mask into + // sub elements and checking to see if each is + // all zeros or all ones - suitable for shuffle masking. + auto BuildClearMask = [&](int Split) { + int NumSubElts = NumElts * Split; + int NumSubBits = RVT.getScalarSizeInBits() / Split; + SmallVector<int, 8> Indices; - unsigned NumElts = RHS.getNumOperands(); + for (int i = 0; i != NumSubElts; ++i) { + int EltIdx = i / Split; + int SubIdx = i % Split; + SDValue Elt = RHS.getOperand(EltIdx); + if (Elt.getOpcode() == ISD::UNDEF) { + Indices.push_back(-1); + continue; + } - for (unsigned i = 0; i != NumElts; ++i) { - SDValue Elt = RHS.getOperand(i); - if (isAllOnesConstant(Elt)) + APInt Bits; + if (isa<ConstantSDNode>(Elt)) + Bits = cast<ConstantSDNode>(Elt)->getAPIntValue(); + else if (isa<ConstantFPSDNode>(Elt)) + Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt(); + else + return SDValue(); + + // Extract the sub element from the constant bit mask. + if (DAG.getDataLayout().isBigEndian()) { + Bits = Bits.lshr((Split - SubIdx - 1) * NumSubBits); + } else { + Bits = Bits.lshr(SubIdx * NumSubBits); + } + + if (Split > 1) + Bits = Bits.trunc(NumSubBits); + + if (Bits.isAllOnesValue()) Indices.push_back(i); - else if (isNullConstant(Elt)) - Indices.push_back(NumElts+i); + else if (Bits == 0) + Indices.push_back(i + NumSubElts); else return SDValue(); } // Let's see if the target supports this vector_shuffle. - EVT RVT = RHS.getValueType(); - if (!TLI.isVectorClearMaskLegal(Indices, RVT)) + EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits); + EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts); + if (!TLI.isVectorClearMaskLegal(Indices, ClearVT)) return SDValue(); - // Return the new VECTOR_SHUFFLE node. - EVT EltVT = RVT.getVectorElementType(); - SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(), - DAG.getConstant(0, dl, EltVT)); - SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, RVT, ZeroOps); - LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS); - SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); - return DAG.getNode(ISD::BITCAST, dl, VT, Shuf); - } + SDValue Zero = DAG.getConstant(0, dl, ClearVT); + return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, dl, + DAG.getBitcast(ClearVT, LHS), + Zero, &Indices[0])); + }; + + // Determine maximum split level (byte level masking). + int MaxSplit = 1; + if (RVT.getScalarSizeInBits() % 8 == 0) + MaxSplit = RVT.getScalarSizeInBits() / 8; + + for (int Split = 1; Split <= MaxSplit; ++Split) + if (RVT.getScalarSizeInBits() % Split == 0) + if (SDValue S = BuildClearMask(Split)) + return S; return SDValue(); } @@ -13041,60 +13741,17 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); + SDValue Ops[] = {LHS, RHS}; + // See if we can constant fold the vector operation. + if (SDValue Fold = DAG.FoldConstantVectorArithmetic( + N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags())) + return Fold; + + // Try to convert a constant mask AND into a shuffle clear mask. if (SDValue Shuffle = XformToShuffleWithZero(N)) return Shuffle; - // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold - // this operation. - if (LHS.getOpcode() == ISD::BUILD_VECTOR && - RHS.getOpcode() == ISD::BUILD_VECTOR) { - // Check if both vectors are constants. If not bail out. - if (!(cast<BuildVectorSDNode>(LHS)->isConstant() && - cast<BuildVectorSDNode>(RHS)->isConstant())) - return SDValue(); - - SmallVector<SDValue, 8> Ops; - for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) { - SDValue LHSOp = LHS.getOperand(i); - SDValue RHSOp = RHS.getOperand(i); - - // Can't fold divide by zero. - if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV || - N->getOpcode() == ISD::FDIV) { - if (isNullConstant(RHSOp) || (RHSOp.getOpcode() == ISD::ConstantFP && - cast<ConstantFPSDNode>(RHSOp.getNode())->isZero())) - break; - } - - EVT VT = LHSOp.getValueType(); - EVT RVT = RHSOp.getValueType(); - if (RVT != VT) { - // Integer BUILD_VECTOR operands may have types larger than the element - // size (e.g., when the element type is not legal). Prior to type - // legalization, the types may not match between the two BUILD_VECTORS. - // Truncate one of the operands to make them match. - if (RVT.getSizeInBits() > VT.getSizeInBits()) { - RHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, RHSOp); - } else { - LHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), RVT, LHSOp); - VT = RVT; - } - } - SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(LHS), VT, - LHSOp, RHSOp); - if (FoldOp.getOpcode() != ISD::UNDEF && - FoldOp.getOpcode() != ISD::Constant && - FoldOp.getOpcode() != ISD::ConstantFP) - break; - Ops.push_back(FoldOp); - AddToWorklist(FoldOp.getNode()); - } - - if (Ops.size() == LHS.getNumOperands()) - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), LHS.getValueType(), Ops); - } - // Type legalization might introduce new shuffles in the DAG. // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask))) // -> (shuffle (VBinOp (A, B)), Undef, Mask). @@ -13109,7 +13766,8 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { EVT VT = N->getValueType(0); SDValue UndefVector = LHS.getOperand(1); SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT, - LHS.getOperand(0), RHS.getOperand(0)); + LHS.getOperand(0), RHS.getOperand(0), + N->getFlags()); AddUsersToWorklist(N); return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector, &SVN0->getMask()[0]); @@ -13390,9 +14048,10 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset); AddToWorklist(CPIdx.getNode()); - return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), false, - false, false, Alignment); + return DAG.getLoad( + TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), + false, false, false, Alignment); } } @@ -13481,8 +14140,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, // Get a SetCC of the condition // NOTE: Don't create a SETCC if it's not legal on this target. if (!LegalOperations || - TLI.isOperationLegal(ISD::SETCC, - LegalTypes ? getSetCCResultType(N0.getValueType()) : MVT::i1)) { + TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) { SDValue Temp, SCC; // cast from setcc result type to select result type if (LegalTypes) { @@ -13514,51 +14172,6 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, } } - // Check to see if this is the equivalent of setcc - // FIXME: Turn all of these into setcc if setcc if setcc is legal - // otherwise, go ahead with the folds. - if (0 && isNullConstant(N3) && isOneConstant(N2)) { - EVT XType = N0.getValueType(); - if (!LegalOperations || - TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(XType))) { - SDValue Res = DAG.getSetCC(DL, getSetCCResultType(XType), N0, N1, CC); - if (Res.getValueType() != VT) - Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res); - return Res; - } - - // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X)))) - if (isNullConstant(N1) && CC == ISD::SETEQ && - (!LegalOperations || - TLI.isOperationLegal(ISD::CTLZ, XType))) { - SDValue Ctlz = DAG.getNode(ISD::CTLZ, SDLoc(N0), XType, N0); - return DAG.getNode(ISD::SRL, DL, XType, Ctlz, - DAG.getConstant(Log2_32(XType.getSizeInBits()), - SDLoc(Ctlz), - getShiftAmountTy(Ctlz.getValueType()))); - } - // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1)) - if (isNullConstant(N1) && CC == ISD::SETGT) { - SDLoc DL(N0); - SDValue NegN0 = DAG.getNode(ISD::SUB, DL, - XType, DAG.getConstant(0, DL, XType), N0); - SDValue NotN0 = DAG.getNOT(DL, N0, XType); - return DAG.getNode(ISD::SRL, DL, XType, - DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0), - DAG.getConstant(XType.getSizeInBits() - 1, DL, - getShiftAmountTy(XType))); - } - // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1)) - if (isAllOnesConstant(N1) && CC == ISD::SETGT) { - SDLoc DL(N0); - SDValue Sign = DAG.getNode(ISD::SRL, DL, XType, N0, - DAG.getConstant(XType.getSizeInBits() - 1, DL, - getShiftAmountTy(N0.getValueType()))); - return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, DL, - XType)); - } - } - // Check to see if this is an integer abs. // select_cc setg[te] X, 0, X, -X -> // select_cc setgt X, -1, X, -X -> @@ -13666,7 +14279,7 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) { return S; } -SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) { +SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) { if (Level >= AfterLegalizeDAG) return SDValue(); @@ -13690,16 +14303,16 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) { // Newton iterations: Est = Est + Est (1 - Arg * Est) for (unsigned i = 0; i < Iterations; ++i) { - SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est); + SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags); AddToWorklist(NewEst.getNode()); - NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst); + NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags); AddToWorklist(NewEst.getNode()); - NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst); + NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); AddToWorklist(NewEst.getNode()); - Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst); + Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags); AddToWorklist(Est.getNode()); } } @@ -13716,31 +14329,32 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) { /// X_{i+1} = X_i (1.5 - A X_i^2 / 2) /// As a result, we precompute A/2 prior to the iteration loop. SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est, - unsigned Iterations) { + unsigned Iterations, + SDNodeFlags *Flags) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT); // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that // this entire sequence requires only one FP constant. - SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg); + SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags); AddToWorklist(HalfArg.getNode()); - HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg); + HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags); AddToWorklist(HalfArg.getNode()); // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) for (unsigned i = 0; i < Iterations; ++i) { - SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est); + SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags); AddToWorklist(NewEst.getNode()); - NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst); + NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags); AddToWorklist(NewEst.getNode()); - NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst); + NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags); AddToWorklist(NewEst.getNode()); - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst); + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); AddToWorklist(Est.getNode()); } return Est; @@ -13752,7 +14366,8 @@ SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est, /// => /// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0)) SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est, - unsigned Iterations) { + unsigned Iterations, + SDNodeFlags *Flags) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT); @@ -13760,25 +14375,25 @@ SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est, // Newton iterations: Est = -0.5 * Est * (-3.0 + Arg * Est * Est) for (unsigned i = 0; i < Iterations; ++i) { - SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf); + SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags); AddToWorklist(HalfEst.getNode()); - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est); + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags); AddToWorklist(Est.getNode()); - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg); + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags); AddToWorklist(Est.getNode()); - Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree); + Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree, Flags); AddToWorklist(Est.getNode()); - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst); + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst, Flags); AddToWorklist(Est.getNode()); } return Est; } -SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) { +SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) { if (Level >= AfterLegalizeDAG) return SDValue(); @@ -13790,8 +14405,8 @@ SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) { AddToWorklist(Est.getNode()); if (Iterations) { Est = UseOneConstNR ? - BuildRsqrtNROneConst(Op, Est, Iterations) : - BuildRsqrtNRTwoConst(Op, Est, Iterations); + BuildRsqrtNROneConst(Op, Est, Iterations, Flags) : + BuildRsqrtNRTwoConst(Op, Est, Iterations, Flags); } return Est; } @@ -13955,14 +14570,12 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, SDValue Chain = Chains.pop_back_val(); // For TokenFactor nodes, look at each operand and only continue up the - // chain until we find two aliases. If we've seen two aliases, assume we'll - // find more and revert to original chain since the xform is unlikely to be - // profitable. + // chain until we reach the depth limit. // // FIXME: The depth check could be made to return the last non-aliasing // chain we found before we hit a tokenfactor rather than the original // chain. - if (Depth > 6 || Aliases.size() == 2) { + if (Depth > TLI.getGatherAllAliasesMaxDepth()) { Aliases.clear(); Aliases.push_back(OriginalChain); return; @@ -14094,6 +14707,83 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases); } +bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) { + // This holds the base pointer, index, and the offset in bytes from the base + // pointer. + BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr()); + + // We must have a base and an offset. + if (!BasePtr.Base.getNode()) + return false; + + // Do not handle stores to undef base pointers. + if (BasePtr.Base.getOpcode() == ISD::UNDEF) + return false; + + SmallVector<StoreSDNode *, 8> ChainedStores; + ChainedStores.push_back(St); + + // Walk up the chain and look for nodes with offsets from the same + // base pointer. Stop when reaching an instruction with a different kind + // or instruction which has a different base pointer. + StoreSDNode *Index = St; + while (Index) { + // If the chain has more than one use, then we can't reorder the mem ops. + if (Index != St && !SDValue(Index, 0)->hasOneUse()) + break; + + if (Index->isVolatile() || Index->isIndexed()) + break; + + // Find the base pointer and offset for this memory node. + BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr()); + + // Check that the base pointer is the same as the original one. + if (!Ptr.equalBaseIndex(BasePtr)) + break; + + // Find the next memory operand in the chain. If the next operand in the + // chain is a store then move up and continue the scan with the next + // memory operand. If the next operand is a load save it and use alias + // information to check if it interferes with anything. + SDNode *NextInChain = Index->getChain().getNode(); + while (true) { + if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) { + // We found a store node. Use it for the next iteration. + ChainedStores.push_back(STn); + Index = STn; + break; + } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) { + NextInChain = Ldn->getChain().getNode(); + continue; + } else { + Index = nullptr; + break; + } + } + } + + bool MadeChange = false; + SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains; + + for (StoreSDNode *ChainedStore : ChainedStores) { + SDValue Chain = ChainedStore->getChain(); + SDValue BetterChain = FindBetterChain(ChainedStore, Chain); + + if (Chain != BetterChain) { + MadeChange = true; + BetterChains.push_back(std::make_pair(ChainedStore, BetterChain)); + } + } + + // Do all replacements after finding the replacements to make to avoid making + // the chains more complicated by introducing new TokenFactors. + for (auto Replacement : BetterChains) + replaceStoreChain(Replacement.first, Replacement.second); + + return MadeChange; +} + /// This is the entry point for the file. void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA, CodeGenOpt::Level OptLevel) { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 2b9ba2c1b534..cfbb20947acc 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -118,9 +118,9 @@ bool FastISel::lowerArguments() { for (Function::const_arg_iterator I = FuncInfo.Fn->arg_begin(), E = FuncInfo.Fn->arg_end(); I != E; ++I) { - DenseMap<const Value *, unsigned>::iterator VI = LocalValueMap.find(I); + DenseMap<const Value *, unsigned>::iterator VI = LocalValueMap.find(&*I); assert(VI != LocalValueMap.end() && "Missed an argument?"); - FuncInfo.ValueMap[I] = VI->second; + FuncInfo.ValueMap[&*I] = VI->second; } return true; } @@ -611,7 +611,7 @@ bool FastISel::selectStackmap(const CallInst *I) { // have to worry about calling conventions and target-specific lowering code. // Instead we perform the call lowering right here. // - // CALLSEQ_START(0) + // CALLSEQ_START(0...) // STACKMAP(id, nbytes, ...) // CALLSEQ_END(0, 0) // @@ -647,8 +647,11 @@ bool FastISel::selectStackmap(const CallInst *I) { // Issue CALLSEQ_START unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) - .addImm(0); + auto Builder = + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)); + const MCInstrDesc &MCID = Builder.getInstr()->getDesc(); + for (unsigned I = 0, E = MCID.getNumOperands(); I < E; ++I) + Builder.addImm(0); // Issue STACKMAP. MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, @@ -1100,13 +1103,6 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { // The donothing intrinsic does, well, nothing. case Intrinsic::donothing: return true; - case Intrinsic::eh_actions: { - unsigned ResultReg = getRegForValue(UndefValue::get(II->getType())); - if (!ResultReg) - return false; - updateValueMap(II, ResultReg); - return true; - } case Intrinsic::dbg_declare: { const DbgDeclareInst *DI = cast<DbgDeclareInst>(II); assert(DI->getVariable() && "Missing variable"); @@ -1326,12 +1322,38 @@ bool FastISel::selectBitCast(const User *I) { return true; } +// Remove local value instructions starting from the instruction after +// SavedLastLocalValue to the current function insert point. +void FastISel::removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue) +{ + MachineInstr *CurLastLocalValue = getLastLocalValue(); + if (CurLastLocalValue != SavedLastLocalValue) { + // Find the first local value instruction to be deleted. + // This is the instruction after SavedLastLocalValue if it is non-NULL. + // Otherwise it's the first instruction in the block. + MachineBasicBlock::iterator FirstDeadInst(SavedLastLocalValue); + if (SavedLastLocalValue) + ++FirstDeadInst; + else + FirstDeadInst = FuncInfo.MBB->getFirstNonPHI(); + setLastLocalValue(SavedLastLocalValue); + removeDeadCode(FirstDeadInst, FuncInfo.InsertPt); + } +} + bool FastISel::selectInstruction(const Instruction *I) { + MachineInstr *SavedLastLocalValue = getLastLocalValue(); // Just before the terminator instruction, insert instructions to // feed PHI nodes in successor blocks. if (isa<TerminatorInst>(I)) - if (!handlePHINodesInSuccessorBlocks(I->getParent())) + if (!handlePHINodesInSuccessorBlocks(I->getParent())) { + // PHI node handling may have generated local value instructions, + // even though it failed to handle all PHI nodes. + // We remove these instructions because SelectionDAGISel will generate + // them again. + removeDeadLocalValueCode(SavedLastLocalValue); return false; + } DbgLoc = I->getDebugLoc(); @@ -1348,7 +1370,7 @@ bool FastISel::selectInstruction(const Instruction *I) { LibInfo->hasOptimizedCodeGen(Func)) return false; - // Don't handle Intrinsic::trap if a trap funciton is specified. + // Don't handle Intrinsic::trap if a trap function is specified. if (F && F->getIntrinsicID() == Intrinsic::trap && Call->hasFnAttr("trap-func-name")) return false; @@ -1380,8 +1402,12 @@ bool FastISel::selectInstruction(const Instruction *I) { DbgLoc = DebugLoc(); // Undo phi node updates, because they will be added again by SelectionDAG. - if (isa<TerminatorInst>(I)) + if (isa<TerminatorInst>(I)) { + // PHI node handling may have generated local value instructions. + // We remove them because SelectionDAGISel will generate them again. + removeDeadLocalValueCode(SavedLastLocalValue); FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate); + } return false; } @@ -1398,11 +1424,30 @@ void FastISel::fastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) { TII.InsertBranch(*FuncInfo.MBB, MSucc, nullptr, SmallVector<MachineOperand, 0>(), DbgLoc); } - uint32_t BranchWeight = 0; - if (FuncInfo.BPI) - BranchWeight = FuncInfo.BPI->getEdgeWeight(FuncInfo.MBB->getBasicBlock(), - MSucc->getBasicBlock()); - FuncInfo.MBB->addSuccessor(MSucc, BranchWeight); + if (FuncInfo.BPI) { + auto BranchProbability = FuncInfo.BPI->getEdgeProbability( + FuncInfo.MBB->getBasicBlock(), MSucc->getBasicBlock()); + FuncInfo.MBB->addSuccessor(MSucc, BranchProbability); + } else + FuncInfo.MBB->addSuccessorWithoutProb(MSucc); +} + +void FastISel::finishCondBranch(const BasicBlock *BranchBB, + MachineBasicBlock *TrueMBB, + MachineBasicBlock *FalseMBB) { + // Add TrueMBB as successor unless it is equal to the FalseMBB: This can + // happen in degenerate IR and MachineIR forbids to have a block twice in the + // successor/predecessor lists. + if (TrueMBB != FalseMBB) { + if (FuncInfo.BPI) { + auto BranchProbability = + FuncInfo.BPI->getEdgeProbability(BranchBB, TrueMBB->getBasicBlock()); + FuncInfo.MBB->addSuccessor(TrueMBB, BranchProbability); + } else + FuncInfo.MBB->addSuccessorWithoutProb(TrueMBB); + } + + fastEmitBranch(FalseMBB, DbgLoc); } /// Emit an FNeg operation. @@ -1864,21 +1909,18 @@ unsigned FastISel::fastEmitInst_rii(unsigned MachineInstOpcode, return ResultReg; } -unsigned FastISel::fastEmitInst_rf(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, unsigned Op0, - bool Op0IsKill, const ConstantFP *FPImm) { +unsigned FastISel::fastEmitInst_f(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + const ConstantFP *FPImm) { const MCInstrDesc &II = TII.get(MachineInstOpcode); unsigned ResultReg = createResultReg(RC); - Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addReg(Op0, getKillRegState(Op0IsKill)) .addFPImm(FPImm); else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(Op0, getKillRegState(Op0IsKill)) .addFPImm(FPImm); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); @@ -1912,35 +1954,6 @@ unsigned FastISel::fastEmitInst_rri(unsigned MachineInstOpcode, return ResultReg; } -unsigned FastISel::fastEmitInst_rrii(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, unsigned Op1, - bool Op1IsKill, uint64_t Imm1, - uint64_t Imm2) { - const MCInstrDesc &II = TII.get(MachineInstOpcode); - - unsigned ResultReg = createResultReg(RC); - Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); - Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); - - if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addReg(Op0, getKillRegState(Op0IsKill)) - .addReg(Op1, getKillRegState(Op1IsKill)) - .addImm(Imm1) - .addImm(Imm2); - else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(Op0, getKillRegState(Op0IsKill)) - .addReg(Op1, getKillRegState(Op1IsKill)) - .addImm(Imm1) - .addImm(Imm2); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); - } - return ResultReg; -} - unsigned FastISel::fastEmitInst_i(unsigned MachineInstOpcode, const TargetRegisterClass *RC, uint64_t Imm) { unsigned ResultReg = createResultReg(RC); @@ -1957,25 +1970,6 @@ unsigned FastISel::fastEmitInst_i(unsigned MachineInstOpcode, return ResultReg; } -unsigned FastISel::fastEmitInst_ii(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, uint64_t Imm1, - uint64_t Imm2) { - unsigned ResultReg = createResultReg(RC); - const MCInstrDesc &II = TII.get(MachineInstOpcode); - - if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addImm(Imm1) - .addImm(Imm2); - else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm1) - .addImm(Imm2); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); - } - return ResultReg; -} - unsigned FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, bool Op0IsKill, uint32_t Idx) { unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index cc306cbf5ae4..b62bd2bd63ee 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -87,6 +87,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, TLI = MF->getSubtarget().getTargetLowering(); RegInfo = &MF->getRegInfo(); MachineModuleInfo &MMI = MF->getMMI(); + const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); // Check whether the function can return without sret-demotion. SmallVector<ISD::OutputArg, 4> Outs; @@ -103,28 +104,29 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) { if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) { - // Static allocas can be folded into the initial stack frame adjustment. - if (AI->isStaticAlloca()) { + Type *Ty = AI->getAllocatedType(); + unsigned Align = + std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(Ty), + AI->getAlignment()); + unsigned StackAlign = TFI->getStackAlignment(); + + // Static allocas can be folded into the initial stack frame + // adjustment. For targets that don't realign the stack, don't + // do this if there is an extra alignment requirement. + if (AI->isStaticAlloca() && + (TFI->isStackRealignable() || (Align <= StackAlign))) { const ConstantInt *CUI = cast<ConstantInt>(AI->getArraySize()); - Type *Ty = AI->getAllocatedType(); uint64_t TySize = MF->getDataLayout().getTypeAllocSize(Ty); - unsigned Align = - std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(Ty), - AI->getAlignment()); TySize *= CUI->getZExtValue(); // Get total allocated size. if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. StaticAllocaMap[AI] = MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI); - } else { - unsigned Align = - std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment( - AI->getAllocatedType()), - AI->getAlignment()); - unsigned StackAlign = - MF->getSubtarget().getFrameLowering()->getStackAlignment(); + // FIXME: Overaligned static allocas should be grouped into + // a single dynamic allocation instead of using a separate + // stack allocation for each one. if (Align <= StackAlign) Align = 0; // Inform the Frame Information that we have variable-sized objects. @@ -134,7 +136,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, // Look for inline asm that clobbers the SP register. if (isa<CallInst>(I) || isa<InvokeInst>(I)) { - ImmutableCallSite CS(I); + ImmutableCallSite CS(&*I); if (isa<InlineAsm>(CS.getCalledValue())) { unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); @@ -163,7 +165,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, MF->getFrameInfo()->setHasVAStart(true); } - // If we have a musttail call in a variadic funciton, we need to ensure we + // If we have a musttail call in a variadic function, we need to ensure we // forward implicit register parameters. if (const auto *CI = dyn_cast<CallInst>(I)) { if (CI->isMustTailCall() && Fn->isVarArg()) @@ -172,10 +174,9 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, // Mark values used outside their block as exported, by allocating // a virtual register for them. - if (isUsedOutsideOfDefiningBlock(I)) - if (!isa<AllocaInst>(I) || - !StaticAllocaMap.count(cast<AllocaInst>(I))) - InitializeRegForValue(I); + if (isUsedOutsideOfDefiningBlock(&*I)) + if (!isa<AllocaInst>(I) || !StaticAllocaMap.count(cast<AllocaInst>(I))) + InitializeRegForValue(&*I); // Collect llvm.dbg.declare information. This is done now instead of // during the initial isel pass through the IR so that it is done @@ -205,15 +206,36 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, } // Decide the preferred extend type for a value. - PreferredExtendType[I] = getPreferredExtendForValue(I); + PreferredExtendType[&*I] = getPreferredExtendForValue(&*I); } // Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This // also creates the initial PHI MachineInstrs, though none of the input // operands are populated. for (BB = Fn->begin(); BB != EB; ++BB) { - MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(BB); - MBBMap[BB] = MBB; + // Don't create MachineBasicBlocks for imaginary EH pad blocks. These blocks + // are really data, and no instructions can live here. + if (BB->isEHPad()) { + const Instruction *I = BB->getFirstNonPHI(); + // If this is a non-landingpad EH pad, mark this function as using + // funclets. + // FIXME: SEH catchpads do not create funclets, so we could avoid setting + // this in such cases in order to improve frame layout. + if (!isa<LandingPadInst>(I)) { + MMI.setHasEHFunclets(true); + MF->getFrameInfo()->setHasOpaqueSPAdjustment(true); + } + if (isa<CatchSwitchInst>(I)) { + assert(&*BB->begin() == I && + "WinEHPrepare failed to remove PHIs from imaginary BBs"); + continue; + } + if (isa<FuncletPadInst>(I)) + assert(&*BB->begin() == I && "WinEHPrepare failed to demote PHIs"); + } + + MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(&*BB); + MBBMap[&*BB] = MBB; MF->push_back(MBB); // Transfer the address-taken flag. This is necessary because there could @@ -252,94 +274,64 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, // Mark landing pad blocks. SmallVector<const LandingPadInst *, 4> LPads; for (BB = Fn->begin(); BB != EB; ++BB) { - if (const auto *Invoke = dyn_cast<InvokeInst>(BB->getTerminator())) - MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad(); - if (BB->isLandingPad()) - LPads.push_back(BB->getLandingPadInst()); + const Instruction *FNP = BB->getFirstNonPHI(); + if (BB->isEHPad() && MBBMap.count(&*BB)) + MBBMap[&*BB]->setIsEHPad(); + if (const auto *LPI = dyn_cast<LandingPadInst>(FNP)) + LPads.push_back(LPI); } - // If this is an MSVC EH personality, we need to do a bit more work. - EHPersonality Personality = EHPersonality::Unknown; - if (Fn->hasPersonalityFn()) - Personality = classifyEHPersonality(Fn->getPersonalityFn()); - if (!isMSVCEHPersonality(Personality)) + // If this personality uses funclets, we need to do a bit more work. + if (!Fn->hasPersonalityFn()) + return; + EHPersonality Personality = classifyEHPersonality(Fn->getPersonalityFn()); + if (!isFuncletEHPersonality(Personality)) return; - if (Personality == EHPersonality::MSVC_Win64SEH || - Personality == EHPersonality::MSVC_X86SEH) { - addSEHHandlersForLPads(LPads); - } - - WinEHFuncInfo &EHInfo = MMI.getWinEHFuncInfo(&fn); - if (Personality == EHPersonality::MSVC_CXX) { - const Function *WinEHParentFn = MMI.getWinEHParent(&fn); - calculateWinCXXEHStateNumbers(WinEHParentFn, EHInfo); - } - - // Copy the state numbers to LandingPadInfo for the current function, which - // could be a handler or the parent. This should happen for 32-bit SEH and - // C++ EH. - if (Personality == EHPersonality::MSVC_CXX || - Personality == EHPersonality::MSVC_X86SEH) { - for (const LandingPadInst *LP : LPads) { - MachineBasicBlock *LPadMBB = MBBMap[LP->getParent()]; - MMI.addWinEHState(LPadMBB, EHInfo.LandingPadStateMap[LP]); - } - } -} - -void FunctionLoweringInfo::addSEHHandlersForLPads( - ArrayRef<const LandingPadInst *> LPads) { - MachineModuleInfo &MMI = MF->getMMI(); - - // Iterate over all landing pads with llvm.eh.actions calls. - for (const LandingPadInst *LP : LPads) { - const IntrinsicInst *ActionsCall = - dyn_cast<IntrinsicInst>(LP->getNextNode()); - if (!ActionsCall || - ActionsCall->getIntrinsicID() != Intrinsic::eh_actions) - continue; - - // Parse the llvm.eh.actions call we found. - MachineBasicBlock *LPadMBB = MBBMap[LP->getParent()]; - SmallVector<std::unique_ptr<ActionHandler>, 4> Actions; - parseEHActions(ActionsCall, Actions); - - // Iterate EH actions from most to least precedence, which means - // iterating in reverse. - for (auto I = Actions.rbegin(), E = Actions.rend(); I != E; ++I) { - ActionHandler *Action = I->get(); - if (auto *CH = dyn_cast<CatchHandler>(Action)) { - const auto *Filter = - dyn_cast<Function>(CH->getSelector()->stripPointerCasts()); - assert((Filter || CH->getSelector()->isNullValue()) && - "expected function or catch-all"); - const auto *RecoverBA = - cast<BlockAddress>(CH->getHandlerBlockOrFunc()); - MMI.addSEHCatchHandler(LPadMBB, Filter, RecoverBA); + // Calculate state numbers if we haven't already. + WinEHFuncInfo &EHInfo = *MF->getWinEHFuncInfo(); + if (Personality == EHPersonality::MSVC_CXX) + calculateWinCXXEHStateNumbers(&fn, EHInfo); + else if (isAsynchronousEHPersonality(Personality)) + calculateSEHStateNumbers(&fn, EHInfo); + else if (Personality == EHPersonality::CoreCLR) + calculateClrEHStateNumbers(&fn, EHInfo); + + calculateCatchReturnSuccessorColors(&fn, EHInfo); + + // Map all BB references in the WinEH data to MBBs. + for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) { + for (WinEHHandlerType &H : TBME.HandlerArray) { + if (H.CatchObj.Alloca) { + assert(StaticAllocaMap.count(H.CatchObj.Alloca)); + H.CatchObj.FrameIndex = StaticAllocaMap[H.CatchObj.Alloca]; } else { - assert(isa<CleanupHandler>(Action)); - const auto *Fini = cast<Function>(Action->getHandlerBlockOrFunc()); - MMI.addSEHCleanupHandler(LPadMBB, Fini); + H.CatchObj.FrameIndex = INT_MAX; } + if (H.Handler) + H.Handler = MBBMap[H.Handler.get<const BasicBlock *>()]; } } + for (CxxUnwindMapEntry &UME : EHInfo.CxxUnwindMap) + if (UME.Cleanup) + UME.Cleanup = MBBMap[UME.Cleanup.get<const BasicBlock *>()]; + for (SEHUnwindMapEntry &UME : EHInfo.SEHUnwindMap) { + const BasicBlock *BB = UME.Handler.get<const BasicBlock *>(); + UME.Handler = MBBMap[BB]; + } + for (ClrEHUnwindMapEntry &CME : EHInfo.ClrEHUnwindMap) { + const BasicBlock *BB = CME.Handler.get<const BasicBlock *>(); + CME.Handler = MBBMap[BB]; + } } /// clear - Clear out all the function-specific state. This returns this /// FunctionLoweringInfo to an empty state, ready to be used for a /// different function. void FunctionLoweringInfo::clear() { - assert(CatchInfoFound.size() == CatchInfoLost.size() && - "Not all catch info was assigned to a landing pad!"); - MBBMap.clear(); ValueMap.clear(); StaticAllocaMap.clear(); -#ifndef NDEBUG - CatchInfoLost.clear(); - CatchInfoFound.clear(); -#endif LiveOutRegInfo.clear(); VisitedBBs.clear(); ArgDbgValues.clear(); @@ -520,6 +512,17 @@ int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) { return 0; } +unsigned FunctionLoweringInfo::getCatchPadExceptionPointerVReg( + const Value *CPI, const TargetRegisterClass *RC) { + MachineRegisterInfo &MRI = MF->getRegInfo(); + auto I = CatchPadExceptionPointers.insert({CPI, 0}); + unsigned &VReg = I.first->second; + if (I.second) + VReg = MRI.createVirtualRegister(RC); + assert(VReg && "null vreg in exception pointer table!"); + return VReg; +} + /// ComputeUsesVAFloatArgument - Determine if any floating-point values are /// being passed to this variadic function, and set the MachineModuleInfo's /// usesVAFloatArgument flag if so. This flag is used to emit an undefined @@ -547,10 +550,9 @@ void llvm::ComputeUsesVAFloatArgument(const CallInst &I, /// landingpad instruction and add them to the specified machine module info. void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI, MachineBasicBlock *MBB) { - MMI.addPersonality( - MBB, - cast<Function>( - I.getParent()->getParent()->getPersonalityFn()->stripPointerCasts())); + if (const auto *PF = dyn_cast<Function>( + I.getParent()->getParent()->getPersonalityFn()->stripPointerCasts())) + MMI.addPersonality(PF); if (I.isCleanup()) MMI.addCleanup(MBB); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 5ec10308dc28..a1e2d410ab00 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -139,7 +139,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, UseRC = RC; else if (RC) { const TargetRegisterClass *ComRC = - TRI->getCommonSubClass(UseRC, RC); + TRI->getCommonSubClass(UseRC, RC, VT.SimpleTy); // If multiple uses expect disjoint register classes, we emit // copies in AddRegisterOperand. if (ComRC) diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index fbc8f1e89f6e..f46767f6c4a1 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -39,6 +39,10 @@ using namespace llvm; #define DEBUG_TYPE "legalizedag" +namespace { + +struct FloatSignAsInt; + //===----------------------------------------------------------------------===// /// This takes an arbitrary SelectionDAG as input and /// hacks on it until the target machine can handle it. This involves @@ -51,7 +55,6 @@ using namespace llvm; /// 'setcc' instruction efficiently, but does support 'brcc' instruction, this /// will attempt merge setcc and brc instructions into brcc's. /// -namespace { class SelectionDAGLegalize { const TargetMachine &TM; const TargetLowering &TLI; @@ -130,7 +133,11 @@ private: SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node); void ExpandDYNAMIC_STACKALLOC(SDNode *Node, SmallVectorImpl<SDValue> &Results); - SDValue ExpandFCOPYSIGN(SDNode *Node); + void getSignAsIntValue(FloatSignAsInt &State, SDLoc DL, SDValue Value) const; + SDValue modifySignAsInt(const FloatSignAsInt &State, SDLoc DL, + SDValue NewIntValue) const; + SDValue ExpandFCOPYSIGN(SDNode *Node) const; + SDValue ExpandFABS(SDNode *Node) const; SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, EVT DestVT, SDLoc dl); SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned, @@ -138,6 +145,7 @@ private: SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned, SDLoc dl); + SDValue ExpandBITREVERSE(SDValue Op, SDLoc dl); SDValue ExpandBSWAP(SDValue Op, SDLoc dl); SDValue ExpandBitCount(unsigned Opc, SDValue Op, SDLoc dl); @@ -146,10 +154,11 @@ private: SDValue ExpandVectorBuildThroughStack(SDNode* Node); SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP); + SDValue ExpandConstant(ConstantSDNode *CP); - std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node); - - void ExpandNode(SDNode *Node); + // if ExpandNode returns false, LegalizeOp falls back to ConvertNodeToLibcall + bool ExpandNode(SDNode *Node); + void ConvertNodeToLibcall(SDNode *Node); void PromoteNode(SDNode *Node); public: @@ -273,17 +282,30 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { DAG.getConstantPool(LLVMC, TLI.getPointerTy(DAG.getDataLayout())); unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); if (Extend) { - SDValue Result = - DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT, - DAG.getEntryNode(), - CPIdx, MachinePointerInfo::getConstantPool(), - VT, false, false, false, Alignment); + SDValue Result = DAG.getExtLoad( + ISD::EXTLOAD, dl, OrigVT, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), VT, + false, false, false, Alignment); return Result; } SDValue Result = - DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), false, false, false, - Alignment); + DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), + false, false, false, Alignment); + return Result; +} + +/// Expands the Constant node to a load from the constant pool. +SDValue SelectionDAGLegalize::ExpandConstant(ConstantSDNode *CP) { + SDLoc dl(CP); + EVT VT = CP->getValueType(0); + SDValue CPIdx = DAG.getConstantPool(CP->getConstantIntValue(), + TLI.getPointerTy(DAG.getDataLayout())); + unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + SDValue Result = + DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), + false, false, false, Alignment); return Result; } @@ -594,13 +616,13 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); // Store the vector. - SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Tmp1, StackPtr, - MachinePointerInfo::getFixedStack(SPFI), - false, false, 0); + SDValue Ch = DAG.getStore( + DAG.getEntryNode(), dl, Tmp1, StackPtr, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI), false, + false, 0); // Truncate or zero extend offset to target pointer type. - unsigned CastOpc = IdxVT.bitsGT(PtrVT) ? ISD::TRUNCATE : ISD::ZERO_EXTEND; - Tmp3 = DAG.getNode(CastOpc, dl, PtrVT, Tmp3); + Tmp3 = DAG.getZExtOrTrunc(Tmp3, dl, PtrVT); // Add the offset to the index. unsigned EltSize = EltVT.getSizeInBits()/8; Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3, @@ -610,9 +632,9 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT, false, false, 0); // Load the updated vector. - return DAG.getLoad(VT, dl, Ch, StackPtr, - MachinePointerInfo::getFixedStack(SPFI), false, false, - false, 0); + return DAG.getLoad(VT, dl, Ch, StackPtr, MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), SPFI), + false, false, false, 0); } @@ -728,14 +750,12 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { case TargetLowering::Legal: { // If this is an unaligned store and the target doesn't support it, // expand it. + EVT MemVT = ST->getMemoryVT(); unsigned AS = ST->getAddressSpace(); unsigned Align = ST->getAlignment(); - if (!TLI.allowsMisalignedMemoryAccesses(ST->getMemoryVT(), AS, Align)) { - Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty); - if (Align < ABIAlignment) - ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this); - } + const DataLayout &DL = DAG.getDataLayout(); + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) + ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this); break; } case TargetLowering::Custom: { @@ -839,20 +859,16 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); ReplaceNode(SDValue(Node, 0), Result); } else { - switch (TLI.getTruncStoreAction(ST->getValue().getSimpleValueType(), - StVT.getSimpleVT())) { + switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: { + EVT MemVT = ST->getMemoryVT(); unsigned AS = ST->getAddressSpace(); unsigned Align = ST->getAlignment(); // If this is an unaligned store and the target doesn't support it, // expand it. - if (!TLI.allowsMisalignedMemoryAccesses(ST->getMemoryVT(), AS, Align)) { - Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = DL.getABITypeAlignment(Ty); - if (Align < ABIAlignment) - ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this); - } + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) + ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this); break; } case TargetLowering::Custom: { @@ -895,17 +911,14 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { switch (TLI.getOperationAction(Node->getOpcode(), VT)) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: { + EVT MemVT = LD->getMemoryVT(); unsigned AS = LD->getAddressSpace(); unsigned Align = LD->getAlignment(); + const DataLayout &DL = DAG.getDataLayout(); // If this is an unaligned load and the target doesn't support it, // expand it. - if (!TLI.allowsMisalignedMemoryAccesses(LD->getMemoryVT(), AS, Align)) { - Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty); - if (Align < ABIAlignment){ - ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, RVal, RChain); - } - } + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) + ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, RVal, RChain); break; } case TargetLowering::Custom: { @@ -1092,23 +1105,20 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Chain = Res.getValue(1); } } else { - // If this is an unaligned load and the target doesn't support - // it, expand it. + // If this is an unaligned load and the target doesn't support it, + // expand it. EVT MemVT = LD->getMemoryVT(); unsigned AS = LD->getAddressSpace(); unsigned Align = LD->getAlignment(); - if (!TLI.allowsMisalignedMemoryAccesses(MemVT, AS, Align)) { - Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty); - if (Align < ABIAlignment){ - ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, Value, Chain); - } - } + const DataLayout &DL = DAG.getDataLayout(); + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) + ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, Value, Chain); } break; } case TargetLowering::Expand: - if (!TLI.isLoadExtLegal(ISD::EXTLOAD, Node->getValueType(0), SrcVT)) { + EVT DestVT = Node->getValueType(0); + if (!TLI.isLoadExtLegal(ISD::EXTLOAD, DestVT, SrcVT)) { // If the source type is not legal, see if there is a legal extload to // an intermediate type that we can then extend further. EVT LoadVT = TLI.getRegisterType(SrcVT.getSimpleVT()); @@ -1127,6 +1137,23 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Chain = Load.getValue(1); break; } + + // Handle the special case of fp16 extloads. EXTLOAD doesn't have the + // normal undefined upper bits behavior to allow using an in-reg extend + // with the illegal FP type, so load as an integer and do the + // from-integer conversion. + if (SrcVT.getScalarType() == MVT::f16) { + EVT ISrcVT = SrcVT.changeTypeToInteger(); + EVT IDestVT = DestVT.changeTypeToInteger(); + EVT LoadVT = TLI.getRegisterType(IDestVT.getSimpleVT()); + + SDValue Result = DAG.getExtLoad(ISD::ZEXTLOAD, dl, LoadVT, + Chain, Ptr, ISrcVT, + LD->getMemOperand()); + Value = DAG.getNode(ISD::FP16_TO_FP, dl, DestVT, Result); + Chain = Result.getValue(1); + break; + } } assert(!SrcVT.isVector() && @@ -1180,15 +1207,17 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { #ifndef NDEBUG for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) - assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) == - TargetLowering::TypeLegal && + assert((TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) == + TargetLowering::TypeLegal || + TLI.isTypeLegal(Node->getValueType(i))) && "Unexpected illegal type!"); for (const SDValue &Op : Node->op_values()) - assert((TLI.getTypeAction(*DAG.getContext(), - Op.getValueType()) == TargetLowering::TypeLegal || - Op.getOpcode() == ISD::TargetConstant) && - "Unexpected illegal type!"); + assert((TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) == + TargetLowering::TypeLegal || + TLI.isTypeLegal(Op.getValueType()) || + Op.getOpcode() == ISD::TargetConstant) && + "Unexpected illegal type!"); #endif // Figure out the correct action; the way to query this varies by opcode @@ -1201,6 +1230,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::STACKSAVE: Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other); break; + case ISD::GET_DYNAMIC_AREA_OFFSET: + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getValueType(0)); + break; case ISD::VAARG: Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); @@ -1229,7 +1262,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::SETCC: case ISD::BR_CC: { unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 : - Node->getOpcode() == ISD::SETCC ? 2 : 1; + Node->getOpcode() == ISD::SETCC ? 2 : + Node->getOpcode() == ISD::SETCCE ? 3 : 1; unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0; MVT OpVT = Node->getOperand(CompareOperand).getSimpleValueType(); ISD::CondCode CCCode = @@ -1265,6 +1299,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::FRAME_TO_ARGS_OFFSET: case ISD::EH_SJLJ_SETJMP: case ISD::EH_SJLJ_LONGJMP: + case ISD::EH_SJLJ_SETUP_DISPATCH: // These operations lie about being legal: when they claim to be legal, // they should actually be expanded. Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); @@ -1281,6 +1316,11 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { if (Action == TargetLowering::Legal) Action = TargetLowering::Custom; break; + case ISD::READCYCLECOUNTER: + // READCYCLECOUNTER returns an i64, even if type legalization might have + // expanded that to several smaller types. + Action = TLI.getOperationAction(Node->getOpcode(), MVT::i64); + break; case ISD::READ_REGISTER: case ISD::WRITE_REGISTER: // Named register is legal in the DAG, but blocked by register name @@ -1379,7 +1419,11 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { } // FALL THROUGH case TargetLowering::Expand: - ExpandNode(Node); + if (ExpandNode(Node)) + return; + // FALL THROUGH + case TargetLowering::LibCall: + ConvertNodeToLibcall(Node); return; case TargetLowering::Promote: PromoteNode(Node); @@ -1419,6 +1463,11 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { // series of EXTRACT_VECTOR_ELT nodes are generated, one for each element in // the vector. If all are expanded here, we don't want one store per vector // element. + + // Caches for hasPredecessorHelper + SmallPtrSet<const SDNode *, 32> Visited; + SmallVector<const SDNode *, 16> Worklist; + SDValue StackPtr, Ch; for (SDNode::use_iterator UI = Vec.getNode()->use_begin(), UE = Vec.getNode()->use_end(); UI != UE; ++UI) { @@ -1433,6 +1482,12 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { if (!ST->getChain().reachesChainWithoutSideEffects(DAG.getEntryNode())) continue; + // If the index is dependent on the store we will introduce a cycle when + // creating the load (the load uses the index, and by replacing the chain + // we will make the index dependent on the load). + if (Idx.getNode()->hasPredecessorHelper(ST, Visited, Worklist)) + continue; + StackPtr = ST->getBasePtr(); Ch = SDValue(ST, 0); break; @@ -1490,7 +1545,8 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType()); int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); - MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI); + MachinePointerInfo PtrInfo = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); // First store the whole vector. SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo, @@ -1528,7 +1584,8 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { SDLoc dl(Node); SDValue FIPtr = DAG.CreateStackTemporary(VT); int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex(); - MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI); + MachinePointerInfo PtrInfo = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); // Emit a store of each element to the stack slot. SmallVector<SDValue, 8> Stores; @@ -1568,69 +1625,143 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { false, false, false, 0); } -SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { - SDLoc dl(Node); - SDValue Tmp1 = Node->getOperand(0); - SDValue Tmp2 = Node->getOperand(1); - - // Get the sign bit of the RHS. First obtain a value that has the same - // sign as the sign bit, i.e. negative if and only if the sign bit is 1. - SDValue SignBit; - EVT FloatVT = Tmp2.getValueType(); - EVT IVT = EVT::getIntegerVT(*DAG.getContext(), FloatVT.getSizeInBits()); +namespace { +/// Keeps track of state when getting the sign of a floating-point value as an +/// integer. +struct FloatSignAsInt { + EVT FloatVT; + SDValue Chain; + SDValue FloatPtr; + SDValue IntPtr; + MachinePointerInfo IntPointerInfo; + MachinePointerInfo FloatPointerInfo; + SDValue IntValue; + APInt SignMask; +}; +} + +/// Bitcast a floating-point value to an integer value. Only bitcast the part +/// containing the sign bit if the target has no integer value capable of +/// holding all bits of the floating-point value. +void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State, + SDLoc DL, SDValue Value) const { + EVT FloatVT = Value.getValueType(); + unsigned NumBits = FloatVT.getSizeInBits(); + State.FloatVT = FloatVT; + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), NumBits); + // Convert to an integer of the same size. if (TLI.isTypeLegal(IVT)) { - // Convert to an integer with the same sign bit. - SignBit = DAG.getNode(ISD::BITCAST, dl, IVT, Tmp2); + State.IntValue = DAG.getNode(ISD::BITCAST, DL, IVT, Value); + State.SignMask = APInt::getSignBit(NumBits); + return; + } + + auto &DataLayout = DAG.getDataLayout(); + // Store the float to memory, then load the sign part out as an integer. + MVT LoadTy = TLI.getRegisterType(*DAG.getContext(), MVT::i8); + // First create a temporary that is aligned for both the load and store. + SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy); + int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + // Then store the float to it. + State.FloatPtr = StackPtr; + MachineFunction &MF = DAG.getMachineFunction(); + State.FloatPointerInfo = MachinePointerInfo::getFixedStack(MF, FI); + State.Chain = DAG.getStore(DAG.getEntryNode(), DL, Value, State.FloatPtr, + State.FloatPointerInfo, false, false, 0); + + SDValue IntPtr; + if (DataLayout.isBigEndian()) { + assert(FloatVT.isByteSized() && "Unsupported floating point type!"); + // Load out a legal integer with the same sign bit as the float. + IntPtr = StackPtr; + State.IntPointerInfo = State.FloatPointerInfo; } else { - auto &DL = DAG.getDataLayout(); - // Store the float to memory, then load the sign part out as an integer. - MVT LoadTy = TLI.getPointerTy(DL); - // First create a temporary that is aligned for both the load and store. - SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy); - // Then store the float to it. - SDValue Ch = - DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StackPtr, MachinePointerInfo(), - false, false, 0); - if (DL.isBigEndian()) { - assert(FloatVT.isByteSized() && "Unsupported floating point type!"); - // Load out a legal integer with the same sign bit as the float. - SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, MachinePointerInfo(), - false, false, false, 0); - } else { // Little endian - SDValue LoadPtr = StackPtr; - // The float may be wider than the integer we are going to load. Advance - // the pointer so that the loaded integer will contain the sign bit. - unsigned Strides = (FloatVT.getSizeInBits()-1)/LoadTy.getSizeInBits(); - unsigned ByteOffset = (Strides * LoadTy.getSizeInBits()) / 8; - LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(), LoadPtr, - DAG.getConstant(ByteOffset, dl, - LoadPtr.getValueType())); - // Load a legal integer containing the sign bit. - SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(), - false, false, false, 0); - // Move the sign bit to the top bit of the loaded integer. - unsigned BitShift = LoadTy.getSizeInBits() - - (FloatVT.getSizeInBits() - 8 * ByteOffset); - assert(BitShift < LoadTy.getSizeInBits() && "Pointer advanced wrong?"); - if (BitShift) - SignBit = DAG.getNode( - ISD::SHL, dl, LoadTy, SignBit, - DAG.getConstant(BitShift, dl, - TLI.getShiftAmountTy(SignBit.getValueType(), DL))); - } + // Advance the pointer so that the loaded byte will contain the sign bit. + unsigned ByteOffset = (FloatVT.getSizeInBits() / 8) - 1; + IntPtr = DAG.getNode(ISD::ADD, DL, StackPtr.getValueType(), StackPtr, + DAG.getConstant(ByteOffset, DL, StackPtr.getValueType())); + State.IntPointerInfo = MachinePointerInfo::getFixedStack(MF, FI, + ByteOffset); } - // Now get the sign bit proper, by seeing whether the value is negative. - SignBit = DAG.getSetCC(dl, getSetCCResultType(SignBit.getValueType()), - SignBit, - DAG.getConstant(0, dl, SignBit.getValueType()), - ISD::SETLT); - // Get the absolute value of the result. - SDValue AbsVal = DAG.getNode(ISD::FABS, dl, Tmp1.getValueType(), Tmp1); - // Select between the nabs and abs value based on the sign bit of - // the input. - return DAG.getSelect(dl, AbsVal.getValueType(), SignBit, - DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal), - AbsVal); + + State.IntPtr = IntPtr; + State.IntValue = DAG.getExtLoad(ISD::EXTLOAD, DL, LoadTy, State.Chain, + IntPtr, State.IntPointerInfo, MVT::i8, + false, false, false, 0); + State.SignMask = APInt::getOneBitSet(LoadTy.getSizeInBits(), 7); +} + +/// Replace the integer value produced by getSignAsIntValue() with a new value +/// and cast the result back to a floating-point type. +SDValue SelectionDAGLegalize::modifySignAsInt(const FloatSignAsInt &State, + SDLoc DL, SDValue NewIntValue) const { + if (!State.Chain) + return DAG.getNode(ISD::BITCAST, DL, State.FloatVT, NewIntValue); + + // Override the part containing the sign bit in the value stored on the stack. + SDValue Chain = DAG.getTruncStore(State.Chain, DL, NewIntValue, State.IntPtr, + State.IntPointerInfo, MVT::i8, false, false, + 0); + return DAG.getLoad(State.FloatVT, DL, Chain, State.FloatPtr, + State.FloatPointerInfo, false, false, false, 0); +} + +SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const { + SDLoc DL(Node); + SDValue Mag = Node->getOperand(0); + SDValue Sign = Node->getOperand(1); + + // Get sign bit into an integer value. + FloatSignAsInt SignAsInt; + getSignAsIntValue(SignAsInt, DL, Sign); + + EVT IntVT = SignAsInt.IntValue.getValueType(); + SDValue SignMask = DAG.getConstant(SignAsInt.SignMask, DL, IntVT); + SDValue SignBit = DAG.getNode(ISD::AND, DL, IntVT, SignAsInt.IntValue, + SignMask); + + // If FABS is legal transform FCOPYSIGN(x, y) => sign(x) ? -FABS(x) : FABS(X) + EVT FloatVT = Mag.getValueType(); + if (TLI.isOperationLegalOrCustom(ISD::FABS, FloatVT) && + TLI.isOperationLegalOrCustom(ISD::FNEG, FloatVT)) { + SDValue AbsValue = DAG.getNode(ISD::FABS, DL, FloatVT, Mag); + SDValue NegValue = DAG.getNode(ISD::FNEG, DL, FloatVT, AbsValue); + SDValue Cond = DAG.getSetCC(DL, getSetCCResultType(IntVT), SignBit, + DAG.getConstant(0, DL, IntVT), ISD::SETNE); + return DAG.getSelect(DL, FloatVT, Cond, NegValue, AbsValue); + } + + // Transform values to integer, copy the sign bit and transform back. + FloatSignAsInt MagAsInt; + getSignAsIntValue(MagAsInt, DL, Mag); + assert(SignAsInt.SignMask == MagAsInt.SignMask); + SDValue ClearSignMask = DAG.getConstant(~SignAsInt.SignMask, DL, IntVT); + SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, MagAsInt.IntValue, + ClearSignMask); + SDValue CopiedSign = DAG.getNode(ISD::OR, DL, IntVT, ClearedSign, SignBit); + + return modifySignAsInt(MagAsInt, DL, CopiedSign); +} + +SDValue SelectionDAGLegalize::ExpandFABS(SDNode *Node) const { + SDLoc DL(Node); + SDValue Value = Node->getOperand(0); + + // Transform FABS(x) => FCOPYSIGN(x, 0.0) if FCOPYSIGN is legal. + EVT FloatVT = Value.getValueType(); + if (TLI.isOperationLegalOrCustom(ISD::FCOPYSIGN, FloatVT)) { + SDValue Zero = DAG.getConstantFP(0.0, DL, FloatVT); + return DAG.getNode(ISD::FCOPYSIGN, DL, FloatVT, Value, Zero); + } + + // Transform value to integer, clear the sign bit and transform back. + FloatSignAsInt ValueAsInt; + getSignAsIntValue(ValueAsInt, DL, Value); + EVT IntVT = ValueAsInt.IntValue.getValueType(); + SDValue ClearSignMask = DAG.getConstant(~ValueAsInt.SignMask, DL, IntVT); + SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, ValueAsInt.IntValue, + ClearSignMask); + return modifySignAsInt(ValueAsInt, DL, ClearedSign); } void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, @@ -1798,7 +1929,8 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr); int SPFI = StackPtrFI->getIndex(); - MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI); + MachinePointerInfo PtrInfo = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); unsigned SrcSize = SrcOp.getValueType().getSizeInBits(); unsigned SlotSize = SlotVT.getSizeInBits(); @@ -1838,14 +1970,14 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(StackPtr); int SPFI = StackPtrFI->getIndex(); - SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(0), - StackPtr, - MachinePointerInfo::getFixedStack(SPFI), - Node->getValueType(0).getVectorElementType(), - false, false, 0); - return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr, - MachinePointerInfo::getFixedStack(SPFI), - false, false, false, 0); + SDValue Ch = DAG.getTruncStore( + DAG.getEntryNode(), dl, Node->getOperand(0), StackPtr, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI), + Node->getValueType(0).getVectorElementType(), false, false, 0); + return DAG.getLoad( + Node->getValueType(0), dl, Ch, StackPtr, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI), false, + false, false, 0); } static bool @@ -2011,9 +2143,10 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { SDValue CPIdx = DAG.getConstantPool(CP, TLI.getPointerTy(DAG.getDataLayout())); unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); - return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), - false, false, false, Alignment); + return DAG.getLoad( + VT, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, + false, false, Alignment); } SmallSet<SDValue, 16> DefinedValues; @@ -2205,47 +2338,6 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, return ExpandLibCall(LC, Node, isSigned); } -/// Return true if divmod libcall is available. -static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, - const TargetLowering &TLI) { - RTLIB::Libcall LC; - switch (Node->getSimpleValueType(0).SimpleTy) { - default: llvm_unreachable("Unexpected request for libcall!"); - case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; - case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; - case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; - case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; - case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break; - } - - return TLI.getLibcallName(LC) != nullptr; -} - -/// Only issue divrem libcall if both quotient and remainder are needed. -static bool useDivRem(SDNode *Node, bool isSigned, bool isDIV) { - // The other use might have been replaced with a divrem already. - unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; - unsigned OtherOpcode = 0; - if (isSigned) - OtherOpcode = isDIV ? ISD::SREM : ISD::SDIV; - else - OtherOpcode = isDIV ? ISD::UREM : ISD::UDIV; - - SDValue Op0 = Node->getOperand(0); - SDValue Op1 = Node->getOperand(1); - for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), - UE = Op0.getNode()->use_end(); UI != UE; ++UI) { - SDNode *User = *UI; - if (User == Node) - continue; - if ((User->getOpcode() == OtherOpcode || User->getOpcode() == DivRemOpc) && - User->getOperand(0) == Op0 && - User->getOperand(1) == Op1) - return true; - } - return false; -} - /// Issue libcalls to __{u}divmod to compute div / rem pairs. void SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, @@ -2428,6 +2520,8 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, EVT DestVT, SDLoc dl) { + // TODO: Should any fast-math-flags be set for the created nodes? + if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) { // simple 32-bit [signed|unsigned] integer to float/double expansion @@ -2611,14 +2705,15 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, Alignment = std::min(Alignment, 4u); SDValue FudgeInReg; if (DestVT == MVT::f32) - FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), - false, false, false, Alignment); + FudgeInReg = DAG.getLoad( + MVT::f32, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, + false, false, Alignment); else { - SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, - DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), - MVT::f32, false, false, false, Alignment); + SDValue Load = DAG.getExtLoad( + ISD::EXTLOAD, dl, DestVT, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32, + false, false, false, Alignment); HandleSDNode Handle(Load); LegalizeOp(Load.getNode()); FudgeInReg = Handle.getValue(); @@ -2713,6 +2808,31 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation); } +/// Open code the operations for BITREVERSE. +SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, SDLoc dl) { + EVT VT = Op.getValueType(); + EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); + unsigned Sz = VT.getScalarSizeInBits(); + + SDValue Tmp, Tmp2; + Tmp = DAG.getConstant(0, dl, VT); + for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) { + if (I < J) + Tmp2 = + DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT)); + else + Tmp2 = + DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT)); + + APInt Shift(Sz, 1); + Shift = Shift.shl(J); + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT)); + Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2); + } + + return Tmp; +} + /// Open code the operations for BSWAP of the specified operation. SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, SDLoc dl) { EVT VT = Op.getValueType(); @@ -2865,16 +2985,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, } } -std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { - unsigned Opc = Node->getOpcode(); - MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT(); - RTLIB::Libcall LC = RTLIB::getATOMIC(Opc, VT); - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!"); - - return ExpandChainLibCall(LC, Node, false); -} - -void SelectionDAGLegalize::ExpandNode(SDNode *Node) { +bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { SmallVector<SDValue, 8> Results; SDLoc dl(Node); SDValue Tmp1, Tmp2, Tmp3, Tmp4; @@ -2888,6 +2999,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl); Results.push_back(Tmp1); break; + case ISD::BITREVERSE: + Results.push_back(ExpandBITREVERSE(Node->getOperand(0), dl)); + break; case ISD::BSWAP: Results.push_back(ExpandBSWAP(Node->getOperand(0), dl)); break; @@ -2908,30 +3022,19 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // preserve the chain and be done. Results.push_back(Node->getOperand(0)); break; + case ISD::READCYCLECOUNTER: + // If the target didn't expand this, just return 'zero' and preserve the + // chain. + Results.append(Node->getNumValues() - 1, + DAG.getConstant(0, dl, Node->getValueType(0))); + Results.push_back(Node->getOperand(0)); + break; case ISD::EH_SJLJ_SETJMP: // If the target didn't expand this, just return 'zero' and preserve the // chain. Results.push_back(DAG.getConstant(0, dl, MVT::i32)); Results.push_back(Node->getOperand(0)); break; - case ISD::ATOMIC_FENCE: { - // If the target didn't lower this, lower it to '__sync_synchronize()' call - // FIXME: handle "fence singlethread" more efficiently. - TargetLowering::ArgListTy Args; - - TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(dl) - .setChain(Node->getOperand(0)) - .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol("__sync_synchronize", - TLI.getPointerTy(DAG.getDataLayout())), - std::move(Args), 0); - - std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); - - Results.push_back(CallResult.second); - break; - } case ISD::ATOMIC_LOAD: { // There is no libcall for atomic load; fake it with ATOMIC_CMP_SWAP. SDValue Zero = DAG.getConstant(0, dl, Node->getValueType(0)); @@ -2959,26 +3062,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Swap.getValue(1)); break; } - // By default, atomic intrinsics are marked Legal and lowered. Targets - // which don't support them directly, however, may want libcalls, in which - // case they mark them Expand, and we get here. - case ISD::ATOMIC_SWAP: - case ISD::ATOMIC_LOAD_ADD: - case ISD::ATOMIC_LOAD_SUB: - case ISD::ATOMIC_LOAD_AND: - case ISD::ATOMIC_LOAD_OR: - case ISD::ATOMIC_LOAD_XOR: - case ISD::ATOMIC_LOAD_NAND: - case ISD::ATOMIC_LOAD_MIN: - case ISD::ATOMIC_LOAD_MAX: - case ISD::ATOMIC_LOAD_UMIN: - case ISD::ATOMIC_LOAD_UMAX: - case ISD::ATOMIC_CMP_SWAP: { - std::pair<SDValue, SDValue> Tmp = ExpandAtomic(Node); - Results.push_back(Tmp.first); - Results.push_back(Tmp.second); - break; - } case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: { // Expanding an ATOMIC_CMP_SWAP_WITH_SUCCESS produces an ATOMIC_CMP_SWAP and // splits out the success value as a comparison. Expanding the resulting @@ -3017,21 +3100,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { } break; } - case ISD::TRAP: { - // If this operation is not supported, lower it to 'abort()' call - TargetLowering::ArgListTy Args; - TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(dl) - .setChain(Node->getOperand(0)) - .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol("abort", - TLI.getPointerTy(DAG.getDataLayout())), - std::move(Args), 0); - std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); - - Results.push_back(CallResult.second); - break; - } case ISD::FP_ROUND: case ISD::BITCAST: Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0), @@ -3097,6 +3165,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Node->getOperand(0), Tmp1, ISD::SETLT); True = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, Node->getOperand(0)); + // TODO: Should any fast-math-flags be set for the FSUB? False = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, DAG.getNode(ISD::FSUB, dl, VT, Node->getOperand(0), Tmp1)); @@ -3106,57 +3175,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp1); break; } - case ISD::VAARG: { - const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); - EVT VT = Node->getValueType(0); - Tmp1 = Node->getOperand(0); - Tmp2 = Node->getOperand(1); - unsigned Align = Node->getConstantOperandVal(3); - - SDValue VAListLoad = - DAG.getLoad(TLI.getPointerTy(DAG.getDataLayout()), dl, Tmp1, Tmp2, - MachinePointerInfo(V), false, false, false, 0); - SDValue VAList = VAListLoad; - - if (Align > TLI.getMinStackArgumentAlignment()) { - assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2"); - - VAList = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList, - DAG.getConstant(Align - 1, dl, - VAList.getValueType())); - - VAList = DAG.getNode(ISD::AND, dl, VAList.getValueType(), VAList, - DAG.getConstant(-(int64_t)Align, dl, - VAList.getValueType())); - } - - // Increment the pointer, VAList, to the next vaarg - Tmp3 = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList, - DAG.getConstant(DAG.getDataLayout().getTypeAllocSize( - VT.getTypeForEVT(*DAG.getContext())), - dl, VAList.getValueType())); - // Store the incremented VAList to the legalized pointer - Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2, - MachinePointerInfo(V), false, false, 0); - // Load the actual argument out of the pointer VAList - Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(), - false, false, false, 0)); + case ISD::VAARG: + Results.push_back(DAG.expandVAArg(Node)); Results.push_back(Results[0].getValue(1)); break; - } - case ISD::VACOPY: { - // This defaults to loading a pointer from the input and storing it to the - // output, returning the chain. - const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue(); - const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue(); - Tmp1 = DAG.getLoad(TLI.getPointerTy(DAG.getDataLayout()), dl, - Node->getOperand(0), Node->getOperand(2), - MachinePointerInfo(VS), false, false, false, 0); - Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), - MachinePointerInfo(VD), false, false, 0); - Results.push_back(Tmp1); + case ISD::VACOPY: + Results.push_back(DAG.expandVACopy(Node)); break; - } case ISD::EXTRACT_VECTOR_ELT: if (Node->getOperand(0).getValueType().getVectorNumElements() == 1) // This must be an access of the only element. Return it. @@ -3302,28 +3327,24 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Node->getOperand(0)); } break; + case ISD::GET_DYNAMIC_AREA_OFFSET: + Results.push_back(DAG.getConstant(0, dl, Node->getValueType(0))); + Results.push_back(Results[0].getValue(0)); + break; case ISD::FCOPYSIGN: Results.push_back(ExpandFCOPYSIGN(Node)); break; case ISD::FNEG: // Expand Y = FNEG(X) -> Y = SUB -0.0, X Tmp1 = DAG.getConstantFP(-0.0, dl, Node->getValueType(0)); + // TODO: If FNEG has fast-math-flags, propagate them to the FSUB. Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1, Node->getOperand(0)); Results.push_back(Tmp1); break; - case ISD::FABS: { - // Expand Y = FABS(X) -> Y = (X >u 0.0) ? X : fneg(X). - EVT VT = Node->getValueType(0); - Tmp1 = Node->getOperand(0); - Tmp2 = DAG.getConstantFP(0.0, dl, VT); - Tmp2 = DAG.getSetCC(dl, getSetCCResultType(Tmp1.getValueType()), - Tmp1, Tmp2, ISD::SETUGT); - Tmp3 = DAG.getNode(ISD::FNEG, dl, VT, Tmp1); - Tmp1 = DAG.getSelect(dl, VT, Tmp2, Tmp1, Tmp3); - Results.push_back(Tmp1); + case ISD::FABS: + Results.push_back(ExpandFABS(Node)); break; - } case ISD::SMIN: case ISD::SMAX: case ISD::UMIN: @@ -3344,25 +3365,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { break; } - case ISD::FMINNUM: - Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64, - RTLIB::FMIN_F80, RTLIB::FMIN_F128, - RTLIB::FMIN_PPCF128)); - break; - case ISD::FMAXNUM: - Results.push_back(ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64, - RTLIB::FMAX_F80, RTLIB::FMAX_F128, - RTLIB::FMAX_PPCF128)); - break; - case ISD::FSQRT: - Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64, - RTLIB::SQRT_F80, RTLIB::SQRT_F128, - RTLIB::SQRT_PPCF128)); - break; case ISD::FSIN: case ISD::FCOS: { EVT VT = Node->getValueType(0); - bool isSIN = Node->getOpcode() == ISD::FSIN; // Turn fsin / fcos into ISD::FSINCOS node if there are a pair of fsin / // fcos which share the same operand and both are used. if ((TLI.isOperationLegalOrCustom(ISD::FSINCOS, VT) || @@ -3370,137 +3375,27 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { && useSinCos(Node)) { SDVTList VTs = DAG.getVTList(VT, VT); Tmp1 = DAG.getNode(ISD::FSINCOS, dl, VTs, Node->getOperand(0)); - if (!isSIN) + if (Node->getOpcode() == ISD::FCOS) Tmp1 = Tmp1.getValue(1); Results.push_back(Tmp1); - } else if (isSIN) { - Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64, - RTLIB::SIN_F80, RTLIB::SIN_F128, - RTLIB::SIN_PPCF128)); - } else { - Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64, - RTLIB::COS_F80, RTLIB::COS_F128, - RTLIB::COS_PPCF128)); } break; } - case ISD::FSINCOS: - // Expand into sincos libcall. - ExpandSinCosLibCall(Node, Results); - break; - case ISD::FLOG: - Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, - RTLIB::LOG_F80, RTLIB::LOG_F128, - RTLIB::LOG_PPCF128)); - break; - case ISD::FLOG2: - Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64, - RTLIB::LOG2_F80, RTLIB::LOG2_F128, - RTLIB::LOG2_PPCF128)); - break; - case ISD::FLOG10: - Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64, - RTLIB::LOG10_F80, RTLIB::LOG10_F128, - RTLIB::LOG10_PPCF128)); - break; - case ISD::FEXP: - Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64, - RTLIB::EXP_F80, RTLIB::EXP_F128, - RTLIB::EXP_PPCF128)); - break; - case ISD::FEXP2: - Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, - RTLIB::EXP2_F80, RTLIB::EXP2_F128, - RTLIB::EXP2_PPCF128)); - break; - case ISD::FTRUNC: - Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, - RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, - RTLIB::TRUNC_PPCF128)); - break; - case ISD::FFLOOR: - Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, - RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, - RTLIB::FLOOR_PPCF128)); - break; - case ISD::FCEIL: - Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64, - RTLIB::CEIL_F80, RTLIB::CEIL_F128, - RTLIB::CEIL_PPCF128)); - break; - case ISD::FRINT: - Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64, - RTLIB::RINT_F80, RTLIB::RINT_F128, - RTLIB::RINT_PPCF128)); - break; - case ISD::FNEARBYINT: - Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32, - RTLIB::NEARBYINT_F64, - RTLIB::NEARBYINT_F80, - RTLIB::NEARBYINT_F128, - RTLIB::NEARBYINT_PPCF128)); - break; - case ISD::FROUND: - Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32, - RTLIB::ROUND_F64, - RTLIB::ROUND_F80, - RTLIB::ROUND_F128, - RTLIB::ROUND_PPCF128)); - break; - case ISD::FPOWI: - Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64, - RTLIB::POWI_F80, RTLIB::POWI_F128, - RTLIB::POWI_PPCF128)); - break; - case ISD::FPOW: - Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64, - RTLIB::POW_F80, RTLIB::POW_F128, - RTLIB::POW_PPCF128)); - break; - case ISD::FDIV: - Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64, - RTLIB::DIV_F80, RTLIB::DIV_F128, - RTLIB::DIV_PPCF128)); - break; - case ISD::FREM: - Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64, - RTLIB::REM_F80, RTLIB::REM_F128, - RTLIB::REM_PPCF128)); - break; - case ISD::FMA: - Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64, - RTLIB::FMA_F80, RTLIB::FMA_F128, - RTLIB::FMA_PPCF128)); - break; case ISD::FMAD: llvm_unreachable("Illegal fmad should never be formed"); - case ISD::FADD: - Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64, - RTLIB::ADD_F80, RTLIB::ADD_F128, - RTLIB::ADD_PPCF128)); - break; - case ISD::FMUL: - Results.push_back(ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64, - RTLIB::MUL_F80, RTLIB::MUL_F128, - RTLIB::MUL_PPCF128)); - break; - case ISD::FP16_TO_FP: { - if (Node->getValueType(0) == MVT::f32) { - Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false)); - break; + case ISD::FP16_TO_FP: + if (Node->getValueType(0) != MVT::f32) { + // We can extend to types bigger than f32 in two steps without changing + // the result. Since "f16 -> f32" is much more commonly available, give + // CodeGen the option of emitting that before resorting to a libcall. + SDValue Res = + DAG.getNode(ISD::FP16_TO_FP, dl, MVT::f32, Node->getOperand(0)); + Results.push_back( + DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Res)); } - - // We can extend to types bigger than f32 in two steps without changing the - // result. Since "f16 -> f32" is much more commonly available, give CodeGen - // the option of emitting that before resorting to a libcall. - SDValue Res = - DAG.getNode(ISD::FP16_TO_FP, dl, MVT::f32, Node->getOperand(0)); - Results.push_back( - DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Res)); break; - } - case ISD::FP_TO_FP16: { + case ISD::FP_TO_FP16: if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) { SDValue Op = Node->getOperand(0); MVT SVT = Op.getSimpleValueType(); @@ -3512,16 +3407,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { DAG.getIntPtrConstant(0, dl)); Results.push_back( DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, FloatVal)); - break; } } - - RTLIB::Libcall LC = - RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::f16); - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_fp16"); - Results.push_back(ExpandLibCall(LC, Node, false)); break; - } case ISD::ConstantFP: { ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node); // Check to see if this FP immediate is already legal. @@ -3530,17 +3418,19 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(ExpandConstantFP(CFP, true)); break; } + case ISD::Constant: { + ConstantSDNode *CP = cast<ConstantSDNode>(Node); + Results.push_back(ExpandConstant(CP)); + break; + } case ISD::FSUB: { EVT VT = Node->getValueType(0); if (TLI.isOperationLegalOrCustom(ISD::FADD, VT) && TLI.isOperationLegalOrCustom(ISD::FNEG, VT)) { + const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(Node)->Flags; Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1)); - Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1); + Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1, Flags); Results.push_back(Tmp1); - } else { - Results.push_back(ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64, - RTLIB::SUB_F80, RTLIB::SUB_F128, - RTLIB::SUB_PPCF128)); } break; } @@ -3564,29 +3454,17 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; Tmp2 = Node->getOperand(0); Tmp3 = Node->getOperand(1); - if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) || - (isDivRemLibcallAvailable(Node, isSigned, TLI) && - // If div is legal, it's better to do the normal expansion - !TLI.isOperationLegalOrCustom(DivOpc, Node->getValueType(0)) && - useDivRem(Node, isSigned, false))) { + if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) { SDVTList VTs = DAG.getVTList(VT, VT); Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1); + Results.push_back(Tmp1); } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) { // X % Y -> X-X/Y*Y Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3); Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3); Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1); - } else if (isSigned) - Tmp1 = ExpandIntLibCall(Node, true, - RTLIB::SREM_I8, - RTLIB::SREM_I16, RTLIB::SREM_I32, - RTLIB::SREM_I64, RTLIB::SREM_I128); - else - Tmp1 = ExpandIntLibCall(Node, false, - RTLIB::UREM_I8, - RTLIB::UREM_I16, RTLIB::UREM_I32, - RTLIB::UREM_I64, RTLIB::UREM_I128); - Results.push_back(Tmp1); + Results.push_back(Tmp1); + } break; } case ISD::UDIV: @@ -3594,23 +3472,12 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { bool isSigned = Node->getOpcode() == ISD::SDIV; unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; EVT VT = Node->getValueType(0); - SDVTList VTs = DAG.getVTList(VT, VT); - if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) || - (isDivRemLibcallAvailable(Node, isSigned, TLI) && - useDivRem(Node, isSigned, true))) + if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) { + SDVTList VTs = DAG.getVTList(VT, VT); Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0), Node->getOperand(1)); - else if (isSigned) - Tmp1 = ExpandIntLibCall(Node, true, - RTLIB::SDIV_I8, - RTLIB::SDIV_I16, RTLIB::SDIV_I32, - RTLIB::SDIV_I64, RTLIB::SDIV_I128); - else - Tmp1 = ExpandIntLibCall(Node, false, - RTLIB::UDIV_I8, - RTLIB::UDIV_I16, RTLIB::UDIV_I32, - RTLIB::UDIV_I64, RTLIB::UDIV_I128); - Results.push_back(Tmp1); + Results.push_back(Tmp1); + } break; } case ISD::MULHU: @@ -3626,11 +3493,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp1.getValue(1)); break; } - case ISD::SDIVREM: - case ISD::UDIVREM: - // Expand into divrem libcall - ExpandDivRemLibCall(Node, Results); - break; case ISD::MUL: { EVT VT = Node->getValueType(0); SDVTList VTs = DAG.getVTList(VT, VT); @@ -3673,14 +3535,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { TLI.getShiftAmountTy(HalfType, DAG.getDataLayout())); Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift); Results.push_back(DAG.getNode(ISD::OR, dl, VT, Lo, Hi)); - break; } - - Tmp1 = ExpandIntLibCall(Node, false, - RTLIB::MUL_I8, - RTLIB::MUL_I16, RTLIB::MUL_I32, - RTLIB::MUL_I64, RTLIB::MUL_I128); - Results.push_back(Tmp1); break; } case ISD::SADDO: @@ -3867,9 +3722,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Index, Table); EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8); - SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr, - MachinePointerInfo::getJumpTable(), MemVT, - false, false, false, 0); + SDValue LD = DAG.getExtLoad( + ISD::SEXTLOAD, dl, PTy, Chain, Addr, + MachinePointerInfo::getJumpTable(DAG.getMachineFunction()), MemVT, + false, false, false, 0); Addr = LD; if (TM.getRelocationModel() == Reloc::PIC_) { // For PIC, the sequence is: @@ -4092,16 +3948,276 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { } // Replace the original node with the legalized result. + if (Results.empty()) + return false; + + ReplaceNode(Node, Results.data()); + return true; +} + +void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { + SmallVector<SDValue, 8> Results; + SDLoc dl(Node); + SDValue Tmp1, Tmp2, Tmp3, Tmp4; + unsigned Opc = Node->getOpcode(); + switch (Opc) { + case ISD::ATOMIC_FENCE: { + // If the target didn't lower this, lower it to '__sync_synchronize()' call + // FIXME: handle "fence singlethread" more efficiently. + TargetLowering::ArgListTy Args; + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl) + .setChain(Node->getOperand(0)) + .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol("__sync_synchronize", + TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args), 0); + + std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); + + Results.push_back(CallResult.second); + break; + } + // By default, atomic intrinsics are marked Legal and lowered. Targets + // which don't support them directly, however, may want libcalls, in which + // case they mark them Expand, and we get here. + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_CMP_SWAP: { + MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT(); + RTLIB::Libcall LC = RTLIB::getATOMIC(Opc, VT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!"); + + std::pair<SDValue, SDValue> Tmp = ExpandChainLibCall(LC, Node, false); + Results.push_back(Tmp.first); + Results.push_back(Tmp.second); + break; + } + case ISD::TRAP: { + // If this operation is not supported, lower it to 'abort()' call + TargetLowering::ArgListTy Args; + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl) + .setChain(Node->getOperand(0)) + .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol("abort", + TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args), 0); + std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); + + Results.push_back(CallResult.second); + break; + } + case ISD::FMINNUM: + Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64, + RTLIB::FMIN_F80, RTLIB::FMIN_F128, + RTLIB::FMIN_PPCF128)); + break; + case ISD::FMAXNUM: + Results.push_back(ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64, + RTLIB::FMAX_F80, RTLIB::FMAX_F128, + RTLIB::FMAX_PPCF128)); + break; + case ISD::FSQRT: + Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64, + RTLIB::SQRT_F80, RTLIB::SQRT_F128, + RTLIB::SQRT_PPCF128)); + break; + case ISD::FSIN: + Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64, + RTLIB::SIN_F80, RTLIB::SIN_F128, + RTLIB::SIN_PPCF128)); + break; + case ISD::FCOS: + Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64, + RTLIB::COS_F80, RTLIB::COS_F128, + RTLIB::COS_PPCF128)); + break; + case ISD::FSINCOS: + // Expand into sincos libcall. + ExpandSinCosLibCall(Node, Results); + break; + case ISD::FLOG: + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, + RTLIB::LOG_F80, RTLIB::LOG_F128, + RTLIB::LOG_PPCF128)); + break; + case ISD::FLOG2: + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64, + RTLIB::LOG2_F80, RTLIB::LOG2_F128, + RTLIB::LOG2_PPCF128)); + break; + case ISD::FLOG10: + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64, + RTLIB::LOG10_F80, RTLIB::LOG10_F128, + RTLIB::LOG10_PPCF128)); + break; + case ISD::FEXP: + Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64, + RTLIB::EXP_F80, RTLIB::EXP_F128, + RTLIB::EXP_PPCF128)); + break; + case ISD::FEXP2: + Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, + RTLIB::EXP2_F80, RTLIB::EXP2_F128, + RTLIB::EXP2_PPCF128)); + break; + case ISD::FTRUNC: + Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, + RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, + RTLIB::TRUNC_PPCF128)); + break; + case ISD::FFLOOR: + Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, + RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, + RTLIB::FLOOR_PPCF128)); + break; + case ISD::FCEIL: + Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64, + RTLIB::CEIL_F80, RTLIB::CEIL_F128, + RTLIB::CEIL_PPCF128)); + break; + case ISD::FRINT: + Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64, + RTLIB::RINT_F80, RTLIB::RINT_F128, + RTLIB::RINT_PPCF128)); + break; + case ISD::FNEARBYINT: + Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32, + RTLIB::NEARBYINT_F64, + RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_F128, + RTLIB::NEARBYINT_PPCF128)); + break; + case ISD::FROUND: + Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32, + RTLIB::ROUND_F64, + RTLIB::ROUND_F80, + RTLIB::ROUND_F128, + RTLIB::ROUND_PPCF128)); + break; + case ISD::FPOWI: + Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64, + RTLIB::POWI_F80, RTLIB::POWI_F128, + RTLIB::POWI_PPCF128)); + break; + case ISD::FPOW: + Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64, + RTLIB::POW_F80, RTLIB::POW_F128, + RTLIB::POW_PPCF128)); + break; + case ISD::FDIV: + Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64, + RTLIB::DIV_F80, RTLIB::DIV_F128, + RTLIB::DIV_PPCF128)); + break; + case ISD::FREM: + Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64, + RTLIB::REM_F80, RTLIB::REM_F128, + RTLIB::REM_PPCF128)); + break; + case ISD::FMA: + Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64, + RTLIB::FMA_F80, RTLIB::FMA_F128, + RTLIB::FMA_PPCF128)); + break; + case ISD::FADD: + Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64, + RTLIB::ADD_F80, RTLIB::ADD_F128, + RTLIB::ADD_PPCF128)); + break; + case ISD::FMUL: + Results.push_back(ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64, + RTLIB::MUL_F80, RTLIB::MUL_F128, + RTLIB::MUL_PPCF128)); + break; + case ISD::FP16_TO_FP: + if (Node->getValueType(0) == MVT::f32) { + Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false)); + } + break; + case ISD::FP_TO_FP16: { + RTLIB::Libcall LC = + RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::f16); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_fp16"); + Results.push_back(ExpandLibCall(LC, Node, false)); + break; + } + case ISD::FSUB: + Results.push_back(ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64, + RTLIB::SUB_F80, RTLIB::SUB_F128, + RTLIB::SUB_PPCF128)); + break; + case ISD::SREM: + Results.push_back(ExpandIntLibCall(Node, true, + RTLIB::SREM_I8, + RTLIB::SREM_I16, RTLIB::SREM_I32, + RTLIB::SREM_I64, RTLIB::SREM_I128)); + break; + case ISD::UREM: + Results.push_back(ExpandIntLibCall(Node, false, + RTLIB::UREM_I8, + RTLIB::UREM_I16, RTLIB::UREM_I32, + RTLIB::UREM_I64, RTLIB::UREM_I128)); + break; + case ISD::SDIV: + Results.push_back(ExpandIntLibCall(Node, true, + RTLIB::SDIV_I8, + RTLIB::SDIV_I16, RTLIB::SDIV_I32, + RTLIB::SDIV_I64, RTLIB::SDIV_I128)); + break; + case ISD::UDIV: + Results.push_back(ExpandIntLibCall(Node, false, + RTLIB::UDIV_I8, + RTLIB::UDIV_I16, RTLIB::UDIV_I32, + RTLIB::UDIV_I64, RTLIB::UDIV_I128)); + break; + case ISD::SDIVREM: + case ISD::UDIVREM: + // Expand into divrem libcall + ExpandDivRemLibCall(Node, Results); + break; + case ISD::MUL: + Results.push_back(ExpandIntLibCall(Node, false, + RTLIB::MUL_I8, + RTLIB::MUL_I16, RTLIB::MUL_I32, + RTLIB::MUL_I64, RTLIB::MUL_I128)); + break; + } + + // Replace the original node with the legalized result. if (!Results.empty()) ReplaceNode(Node, Results.data()); } +// Determine the vector type to use in place of an original scalar element when +// promoting equally sized vectors. +static MVT getPromotedVectorElementType(const TargetLowering &TLI, + MVT EltVT, MVT NewEltVT) { + unsigned OldEltsPerNewElt = EltVT.getSizeInBits() / NewEltVT.getSizeInBits(); + MVT MidVT = MVT::getVectorVT(NewEltVT, OldEltsPerNewElt); + assert(TLI.isTypeLegal(MidVT) && "unexpected"); + return MidVT; +} + void SelectionDAGLegalize::PromoteNode(SDNode *Node) { SmallVector<SDValue, 8> Results; MVT OVT = Node->getSimpleValueType(0); if (Node->getOpcode() == ISD::UINT_TO_FP || Node->getOpcode() == ISD::SINT_TO_FP || - Node->getOpcode() == ISD::SETCC) { + Node->getOpcode() == ISD::SETCC || + Node->getOpcode() == ISD::EXTRACT_VECTOR_ELT || + Node->getOpcode() == ISD::INSERT_VECTOR_ELT) { OVT = Node->getOperand(0).getSimpleValueType(); } if (Node->getOpcode() == ISD::BR_CC) @@ -4284,11 +4400,11 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::FREM: case ISD::FMINNUM: case ISD::FMAXNUM: - case ISD::FCOPYSIGN: case ISD::FPOW: { Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); - Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2); + Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, + Node->getFlags()); Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp3, DAG.getIntPtrConstant(0, dl))); break; @@ -4303,12 +4419,20 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { DAG.getIntPtrConstant(0, dl))); break; } + case ISD::FCOPYSIGN: case ISD::FPOWI: { Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = Node->getOperand(1); Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2); + + // fcopysign doesn't change anything but the sign bit, so + // (fp_round (fcopysign (fpext a), b)) + // is as precise as + // (fp_round (fpext a)) + // which is a no-op. Mark it as a TRUNCating FP_ROUND. + const bool isTrunc = (Node->getOpcode() == ISD::FCOPYSIGN); Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, - Tmp3, DAG.getIntPtrConstant(0, dl))); + Tmp3, DAG.getIntPtrConstant(isTrunc, dl))); break; } case ISD::FFLOOR: @@ -4333,6 +4457,157 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Tmp2, DAG.getIntPtrConstant(0, dl))); break; } + case ISD::BUILD_VECTOR: { + MVT EltVT = OVT.getVectorElementType(); + MVT NewEltVT = NVT.getVectorElementType(); + + // Handle bitcasts to a different vector type with the same total bit size + // + // e.g. v2i64 = build_vector i64:x, i64:y => v4i32 + // => + // v4i32 = concat_vectors (v2i32 (bitcast i64:x)), (v2i32 (bitcast i64:y)) + + assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() && + "Invalid promote type for build_vector"); + assert(NewEltVT.bitsLT(EltVT) && "not handled"); + + MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT); + + SmallVector<SDValue, 8> NewOps; + for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I) { + SDValue Op = Node->getOperand(I); + NewOps.push_back(DAG.getNode(ISD::BITCAST, SDLoc(Op), MidVT, Op)); + } + + SDLoc SL(Node); + SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SL, NVT, NewOps); + SDValue CvtVec = DAG.getNode(ISD::BITCAST, SL, OVT, Concat); + Results.push_back(CvtVec); + break; + } + case ISD::EXTRACT_VECTOR_ELT: { + MVT EltVT = OVT.getVectorElementType(); + MVT NewEltVT = NVT.getVectorElementType(); + + // Handle bitcasts to a different vector type with the same total bit size. + // + // e.g. v2i64 = extract_vector_elt x:v2i64, y:i32 + // => + // v4i32:castx = bitcast x:v2i64 + // + // i64 = bitcast + // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))), + // (i32 (extract_vector_elt castx, (2 * y + 1))) + // + + assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() && + "Invalid promote type for extract_vector_elt"); + assert(NewEltVT.bitsLT(EltVT) && "not handled"); + + MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT); + unsigned NewEltsPerOldElt = MidVT.getVectorNumElements(); + + SDValue Idx = Node->getOperand(1); + EVT IdxVT = Idx.getValueType(); + SDLoc SL(Node); + SDValue Factor = DAG.getConstant(NewEltsPerOldElt, SL, IdxVT); + SDValue NewBaseIdx = DAG.getNode(ISD::MUL, SL, IdxVT, Idx, Factor); + + SDValue CastVec = DAG.getNode(ISD::BITCAST, SL, NVT, Node->getOperand(0)); + + SmallVector<SDValue, 8> NewOps; + for (unsigned I = 0; I < NewEltsPerOldElt; ++I) { + SDValue IdxOffset = DAG.getConstant(I, SL, IdxVT); + SDValue TmpIdx = DAG.getNode(ISD::ADD, SL, IdxVT, NewBaseIdx, IdxOffset); + + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, NewEltVT, + CastVec, TmpIdx); + NewOps.push_back(Elt); + } + + SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, SL, MidVT, NewOps); + + Results.push_back(DAG.getNode(ISD::BITCAST, SL, EltVT, NewVec)); + break; + } + case ISD::INSERT_VECTOR_ELT: { + MVT EltVT = OVT.getVectorElementType(); + MVT NewEltVT = NVT.getVectorElementType(); + + // Handle bitcasts to a different vector type with the same total bit size + // + // e.g. v2i64 = insert_vector_elt x:v2i64, y:i64, z:i32 + // => + // v4i32:castx = bitcast x:v2i64 + // v2i32:casty = bitcast y:i64 + // + // v2i64 = bitcast + // (v4i32 insert_vector_elt + // (v4i32 insert_vector_elt v4i32:castx, + // (extract_vector_elt casty, 0), 2 * z), + // (extract_vector_elt casty, 1), (2 * z + 1)) + + assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() && + "Invalid promote type for insert_vector_elt"); + assert(NewEltVT.bitsLT(EltVT) && "not handled"); + + MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT); + unsigned NewEltsPerOldElt = MidVT.getVectorNumElements(); + + SDValue Val = Node->getOperand(1); + SDValue Idx = Node->getOperand(2); + EVT IdxVT = Idx.getValueType(); + SDLoc SL(Node); + + SDValue Factor = DAG.getConstant(NewEltsPerOldElt, SDLoc(), IdxVT); + SDValue NewBaseIdx = DAG.getNode(ISD::MUL, SL, IdxVT, Idx, Factor); + + SDValue CastVec = DAG.getNode(ISD::BITCAST, SL, NVT, Node->getOperand(0)); + SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, MidVT, Val); + + SDValue NewVec = CastVec; + for (unsigned I = 0; I < NewEltsPerOldElt; ++I) { + SDValue IdxOffset = DAG.getConstant(I, SL, IdxVT); + SDValue InEltIdx = DAG.getNode(ISD::ADD, SL, IdxVT, NewBaseIdx, IdxOffset); + + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, NewEltVT, + CastVal, IdxOffset); + + NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, SL, NVT, + NewVec, Elt, InEltIdx); + } + + Results.push_back(DAG.getNode(ISD::BITCAST, SL, OVT, NewVec)); + break; + } + case ISD::SCALAR_TO_VECTOR: { + MVT EltVT = OVT.getVectorElementType(); + MVT NewEltVT = NVT.getVectorElementType(); + + // Handle bitcasts to different vector type with the smae total bit size. + // + // e.g. v2i64 = scalar_to_vector x:i64 + // => + // concat_vectors (v2i32 bitcast x:i64), (v2i32 undef) + // + + MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT); + SDValue Val = Node->getOperand(0); + SDLoc SL(Node); + + SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, MidVT, Val); + SDValue Undef = DAG.getUNDEF(MidVT); + + SmallVector<SDValue, 8> NewElts; + NewElts.push_back(CastVal); + for (unsigned I = 1, NElts = OVT.getVectorNumElements(); I != NElts; ++I) + NewElts.push_back(Undef); + + SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SL, NVT, NewElts); + SDValue CvtVec = DAG.getNode(ISD::BITCAST, SL, OVT, Concat); + Results.push_back(CvtVec); + break; + } } // Replace the original node with the legalized result. @@ -4356,7 +4631,7 @@ void SelectionDAG::Legalize() { for (auto NI = allnodes_end(); NI != allnodes_begin();) { --NI; - SDNode *N = NI; + SDNode *N = &*NI; if (N->use_empty() && N != getRoot().getNode()) { ++NI; DeleteNode(N); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 3c50a4155731..6c0193a76732 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -43,10 +43,10 @@ static RTLIB::Libcall GetFPLibCall(EVT VT, } //===----------------------------------------------------------------------===// -// Result Float to Integer Conversion. +// Convert Float Results to Integer for Non-HW-supported Operations. //===----------------------------------------------------------------------===// -void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { +bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG); dbgs() << "\n"); SDValue R = SDValue(); @@ -59,20 +59,26 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { #endif llvm_unreachable("Do not know how to soften the result of this operator!"); + case ISD::Register: + case ISD::CopyFromReg: + case ISD::CopyToReg: + assert(isLegalInHWReg(N->getValueType(ResNo)) && + "Unsupported SoftenFloatRes opcode!"); + // Only when isLegalInHWReg, we can skip check of the operands. + R = SDValue(N, ResNo); + break; case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break; - case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break; + case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N, ResNo); break; case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break; - case ISD::ConstantFP: - R = SoftenFloatRes_ConstantFP(cast<ConstantFPSDNode>(N)); - break; + case ISD::ConstantFP: R = SoftenFloatRes_ConstantFP(N, ResNo); break; case ISD::EXTRACT_VECTOR_ELT: R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N); break; - case ISD::FABS: R = SoftenFloatRes_FABS(N); break; + case ISD::FABS: R = SoftenFloatRes_FABS(N, ResNo); break; case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break; case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break; case ISD::FADD: R = SoftenFloatRes_FADD(N); break; case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break; - case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break; + case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N, ResNo); break; case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break; case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break; case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break; @@ -84,7 +90,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FMA: R = SoftenFloatRes_FMA(N); break; case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break; case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break; - case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break; + case ISD::FNEG: R = SoftenFloatRes_FNEG(N, ResNo); break; case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break; case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break; case ISD::FP16_TO_FP: R = SoftenFloatRes_FP16_TO_FP(N); break; @@ -97,9 +103,9 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break; case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break; case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break; - case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break; - case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break; - case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break; + case ISD::LOAD: R = SoftenFloatRes_LOAD(N, ResNo); break; + case ISD::SELECT: R = SoftenFloatRes_SELECT(N, ResNo); break; + case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N, ResNo); break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break; case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break; @@ -107,11 +113,19 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { } // If R is null, the sub-method took care of registering the result. - if (R.getNode()) + if (R.getNode()) { SetSoftenedFloat(SDValue(N, ResNo), R); + ReplaceSoftenFloatResult(N, ResNo, R); + } + // Return true only if the node is changed, + // assuming that the operands are also converted when necessary. + // Otherwise, return false to tell caller to scan operands. + return R.getNode() && R.getNode() != N; } -SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo) { + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); return BitConvertToInteger(N->getOperand(0)); } @@ -130,10 +144,14 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) { BitConvertToInteger(N->getOperand(1))); } -SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) { - return DAG.getConstant(N->getValueAPF().bitcastToAPInt(), SDLoc(N), +SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) { + // When LegalInHWReg, we can load better from the constant pool. + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); + ConstantFPSDNode *CN = cast<ConstantFPSDNode>(N); + return DAG.getConstant(CN->getValueAPF().bitcastToAPInt(), SDLoc(CN), TLI.getTypeToTransformTo(*DAG.getContext(), - N->getValueType(0))); + CN->getValueType(0))); } SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) { @@ -143,7 +161,10 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) { NewOp, N->getOperand(1)); } -SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N, unsigned ResNo) { + // When LegalInHWReg, FABS can be implemented as native bitwise operations. + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned Size = NVT.getSizeInBits(); @@ -165,7 +186,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) { RTLIB::FMIN_F80, RTLIB::FMIN_F128, RTLIB::FMIN_PPCF128), - NVT, Ops, 2, false, SDLoc(N)).first; + NVT, Ops, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) { @@ -178,7 +199,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) { RTLIB::FMAX_F80, RTLIB::FMAX_F128, RTLIB::FMAX_PPCF128), - NVT, Ops, 2, false, SDLoc(N)).first; + NVT, Ops, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { @@ -191,7 +212,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { RTLIB::ADD_F80, RTLIB::ADD_F128, RTLIB::ADD_PPCF128), - NVT, Ops, 2, false, SDLoc(N)).first; + NVT, Ops, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { @@ -203,10 +224,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { RTLIB::CEIL_F80, RTLIB::CEIL_F128, RTLIB::CEIL_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } -SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo) { + // When LegalInHWReg, FCOPYSIGN can be implemented as native bitwise operations. + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); SDValue LHS = GetSoftenedFloat(N->getOperand(0)); SDValue RHS = BitConvertToInteger(N->getOperand(1)); SDLoc dl(N); @@ -263,7 +287,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) { RTLIB::COS_F80, RTLIB::COS_F128, RTLIB::COS_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { @@ -276,7 +300,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { RTLIB::DIV_F80, RTLIB::DIV_F128, RTLIB::DIV_PPCF128), - NVT, Ops, 2, false, SDLoc(N)).first; + NVT, Ops, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { @@ -288,7 +312,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { RTLIB::EXP_F80, RTLIB::EXP_F128, RTLIB::EXP_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { @@ -300,7 +324,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { RTLIB::EXP2_F80, RTLIB::EXP2_F128, RTLIB::EXP2_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { @@ -312,7 +336,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, RTLIB::FLOOR_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { @@ -324,7 +348,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { RTLIB::LOG_F80, RTLIB::LOG_F128, RTLIB::LOG_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { @@ -336,7 +360,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { RTLIB::LOG2_F80, RTLIB::LOG2_F128, RTLIB::LOG2_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { @@ -348,7 +372,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { RTLIB::LOG10_F80, RTLIB::LOG10_F128, RTLIB::LOG10_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { @@ -362,7 +386,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { RTLIB::FMA_F80, RTLIB::FMA_F128, RTLIB::FMA_PPCF128), - NVT, Ops, 3, false, SDLoc(N)).first; + NVT, Ops, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { @@ -375,7 +399,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { RTLIB::MUL_F80, RTLIB::MUL_F128, RTLIB::MUL_PPCF128), - NVT, Ops, 2, false, SDLoc(N)).first; + NVT, Ops, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { @@ -387,10 +411,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { RTLIB::NEARBYINT_F80, RTLIB::NEARBYINT_F128, RTLIB::NEARBYINT_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } -SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo) { + // When LegalInHWReg, FNEG can be implemented as native bitwise operations. + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDLoc dl(N); // Expand Y = FNEG(X) -> Y = SUB -0.0, X @@ -402,7 +429,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { RTLIB::SUB_F80, RTLIB::SUB_F128, RTLIB::SUB_PPCF128), - NVT, Ops, 2, false, dl).first; + NVT, Ops, false, dl).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { @@ -418,11 +445,20 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { SoftenFloatResult(Op.getNode(), 0); } + if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) { + Op = GetPromotedFloat(Op); + // If the promotion did the FP_EXTEND to the destination type for us, + // there's nothing left to do here. + if (Op.getValueType() == N->getValueType(0)) { + return BitConvertToInteger(Op); + } + } + RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0)); if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftenFloat) Op = GetSoftenedFloat(Op); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); - return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first; + return TLI.makeLibCall(DAG, LC, NVT, Op, false, SDLoc(N)).first; } // FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special @@ -430,7 +466,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) { EVT MidVT = TLI.getTypeToTransformTo(*DAG.getContext(), MVT::f32); SDValue Op = N->getOperand(0); - SDValue Res32 = TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MidVT, &Op, 1, + SDValue Res32 = TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MidVT, Op, false, SDLoc(N)).first; if (N->getValueType(0) == MVT::f32) return Res32; @@ -438,7 +474,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); RTLIB::Libcall LC = RTLIB::getFPEXT(MVT::f32, N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); - return TLI.makeLibCall(DAG, LC, NVT, &Res32, 1, false, SDLoc(N)).first; + return TLI.makeLibCall(DAG, LC, NVT, Res32, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { @@ -452,7 +488,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!"); - return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first; + return TLI.makeLibCall(DAG, LC, NVT, Op, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { @@ -465,7 +501,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { RTLIB::POW_F80, RTLIB::POW_F128, RTLIB::POW_PPCF128), - NVT, Ops, 2, false, SDLoc(N)).first; + NVT, Ops, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { @@ -479,7 +515,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { RTLIB::POWI_F80, RTLIB::POWI_F128, RTLIB::POWI_PPCF128), - NVT, Ops, 2, false, SDLoc(N)).first; + NVT, Ops, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { @@ -492,7 +528,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { RTLIB::REM_F80, RTLIB::REM_F128, RTLIB::REM_PPCF128), - NVT, Ops, 2, false, SDLoc(N)).first; + NVT, Ops, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { @@ -504,7 +540,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { RTLIB::RINT_F80, RTLIB::RINT_F128, RTLIB::RINT_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) { @@ -516,7 +552,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) { RTLIB::ROUND_F80, RTLIB::ROUND_F128, RTLIB::ROUND_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { @@ -528,7 +564,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { RTLIB::SIN_F80, RTLIB::SIN_F128, RTLIB::SIN_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { @@ -540,7 +576,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { RTLIB::SQRT_F80, RTLIB::SQRT_F128, RTLIB::SQRT_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { @@ -553,7 +589,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { RTLIB::SUB_F80, RTLIB::SUB_F128, RTLIB::SUB_PPCF128), - NVT, Ops, 2, false, SDLoc(N)).first; + NVT, Ops, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { @@ -568,10 +604,11 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, RTLIB::TRUNC_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } -SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo) { + bool LegalInHWReg = isLegalInHWReg(N->getValueType(ResNo)); LoadSDNode *L = cast<LoadSDNode>(N); EVT VT = N->getValueType(0); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); @@ -586,7 +623,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { L->getAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. - ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); + if (N != NewL.getValue(1).getNode()) + ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); return NewL; } @@ -600,17 +638,24 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); - return BitConvertToInteger(DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL)); + auto ExtendNode = DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL); + if (LegalInHWReg) + return ExtendNode; + return BitConvertToInteger(ExtendNode); } -SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo) { + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); SDValue LHS = GetSoftenedFloat(N->getOperand(1)); SDValue RHS = GetSoftenedFloat(N->getOperand(2)); return DAG.getSelect(SDLoc(N), LHS.getValueType(), N->getOperand(0), LHS, RHS); } -SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo) { + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); SDValue LHS = GetSoftenedFloat(N->getOperand(2)); SDValue RHS = GetSoftenedFloat(N->getOperand(3)); return DAG.getNode(ISD::SELECT_CC, SDLoc(N), @@ -636,7 +681,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) { // Legalized the chain result - switch anything that used the old chain to // use the new one. - ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1)); + if (N != NewVAARG.getValue(1).getNode()) + ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1)); return NewVAARG; } @@ -665,12 +711,12 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { NVT, N->getOperand(0)); return TLI.makeLibCall(DAG, LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT), - &Op, 1, Signed, dl).first; + Op, Signed, dl).first; } //===----------------------------------------------------------------------===// -// Operand Float to Integer Conversion.. +// Convert Float Operand to Integer for Non-HW-supported Operations. //===----------------------------------------------------------------------===// bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { @@ -680,6 +726,8 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { switch (N->getOpcode()) { default: + if (CanSkipSoftenFloatOperand(N, OpNo)) + return false; #ifndef NDEBUG dbgs() << "SoftenFloatOperand Op #" << OpNo << ": "; N->dump(&DAG); dbgs() << "\n"; @@ -691,18 +739,27 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { case ISD::FP_EXTEND: Res = SoftenFloatOp_FP_EXTEND(N); break; case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break; - case ISD::FP_TO_SINT: Res = SoftenFloatOp_FP_TO_SINT(N); break; - case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_UINT(N); break; + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_XINT(N); break; case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break; case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break; - case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break; + case ISD::STORE: + Res = SoftenFloatOp_STORE(N, OpNo); + // Do not try to analyze or soften this node again if the value is + // or can be held in a register. In that case, Res.getNode() should + // be equal to N. + if (Res.getNode() == N && + isLegalInHWReg(N->getOperand(OpNo).getValueType())) + return false; + // Otherwise, we need to reanalyze and lower the new Res nodes. + break; } // If the result is null, the sub-method took care of registering results etc. if (!Res.getNode()) return false; // If the result is N, the sub-method updated N in place. Tell the legalizer - // core about this. + // core about this to re-analyze. if (Res.getNode() == N) return true; @@ -713,6 +770,41 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { return false; } +bool DAGTypeLegalizer::CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo) { + if (!isLegalInHWReg(N->getOperand(OpNo).getValueType())) + return false; + // When the operand type can be kept in registers, SoftenFloatResult + // will call ReplaceValueWith to replace all references and we can + // skip softening this operand. + switch (N->getOperand(OpNo).getOpcode()) { + case ISD::BITCAST: + case ISD::ConstantFP: + case ISD::CopyFromReg: + case ISD::CopyToReg: + case ISD::FABS: + case ISD::FCOPYSIGN: + case ISD::FNEG: + case ISD::Register: + case ISD::SELECT: + case ISD::SELECT_CC: + return true; + } + // For some opcodes, SoftenFloatResult handles all conversion of softening + // and replacing operands, so that there is no need to soften operands + // again, although such opcode could be scanned for other illegal operands. + switch (N->getOpcode()) { + case ISD::ConstantFP: + case ISD::CopyFromReg: + case ISD::CopyToReg: + case ISD::FABS: + case ISD::FCOPYSIGN: + case ISD::FNEG: + case ISD::Register: + return true; + } + return false; +} + SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) { return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), GetSoftenedFloat(N->getOperand(0))); @@ -730,7 +822,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND libcall"); - return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; + return TLI.makeLibCall(DAG, LC, RVT, Op, false, SDLoc(N)).first; } @@ -747,7 +839,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; + return TLI.makeLibCall(DAG, LC, RVT, Op, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { @@ -773,20 +865,33 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { 0); } -SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) { + bool Signed = N->getOpcode() == ISD::FP_TO_SINT; + EVT SVT = N->getOperand(0).getValueType(); EVT RVT = N->getValueType(0); - RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; -} + EVT NVT = EVT(); + SDLoc dl(N); + + // If the result is not legal, eg: fp -> i1, then it needs to be promoted to + // a larger type, eg: fp -> i32. Even if it is legal, no libcall may exactly + // match, eg. we don't have fp -> i8 conversions. + // Look for an appropriate libcall. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + for (unsigned IntVT = MVT::FIRST_INTEGER_VALUETYPE; + IntVT <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL; + ++IntVT) { + NVT = (MVT::SimpleValueType)IntVT; + // The type needs to big enough to hold the result. + if (NVT.bitsGE(RVT)) + LC = Signed ? RTLIB::getFPTOSINT(SVT, NVT):RTLIB::getFPTOUINT(SVT, NVT); + } + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_XINT!"); -SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) { - EVT RVT = N->getValueType(0); - RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; + SDValue Res = TLI.makeLibCall(DAG, LC, NVT, Op, false, dl).first; + + // Truncate the result if the libcall returns a larger type. + return DAG.getNode(ISD::TRUNCATE, dl, RVT, Res); } SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { @@ -1028,7 +1133,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo, RTLIB::DIV_F80, RTLIB::DIV_F128, RTLIB::DIV_PPCF128), - N->getValueType(0), Ops, 2, false, + N->getValueType(0), Ops, false, SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1102,7 +1207,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo, RTLIB::FMA_F80, RTLIB::FMA_F128, RTLIB::FMA_PPCF128), - N->getValueType(0), Ops, 3, false, + N->getValueType(0), Ops, false, SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1116,7 +1221,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo, RTLIB::MUL_F80, RTLIB::MUL_F128, RTLIB::MUL_PPCF128), - N->getValueType(0), Ops, 2, false, + N->getValueType(0), Ops, false, SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1231,7 +1336,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo, RTLIB::SUB_F80, RTLIB::SUB_F128, RTLIB::SUB_PPCF128), - N->getValueType(0), Ops, 2, false, + N->getValueType(0), Ops, false, SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1310,7 +1415,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, } assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!"); - Hi = TLI.makeLibCall(DAG, LC, VT, &Src, 1, true, dl).first; + Hi = TLI.makeLibCall(DAG, LC, VT, Src, true, dl).first; GetPairElements(Hi, Lo, Hi); } @@ -1341,6 +1446,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, break; } + // TODO: Are there fast-math-flags to propagate to this FADD? Lo = DAG.getNode(ISD::FADD, dl, VT, Hi, DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble, APInt(128, Parts)), @@ -1494,7 +1600,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); - return TLI.makeLibCall(DAG, LC, RVT, &N->getOperand(0), 1, false, dl).first; + return TLI.makeLibCall(DAG, LC, RVT, N->getOperand(0), false, dl).first; } SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { @@ -1511,6 +1617,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128); // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X // FIXME: generated code sucks. + // TODO: Are there fast-math-flags to propagate to this FSUB? return DAG.getSelectCC(dl, N->getOperand(0), Tmp, DAG.getNode(ISD::ADD, dl, MVT::i32, DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, @@ -1527,7 +1634,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); - return TLI.makeLibCall(DAG, LC, N->getValueType(0), &N->getOperand(0), 1, + return TLI.makeLibCall(DAG, LC, N->getValueType(0), N->getOperand(0), false, dl).first; } @@ -1912,8 +2019,7 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_BinOp(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); SDValue Op0 = GetPromotedFloat(N->getOperand(0)); SDValue Op1 = GetPromotedFloat(N->getOperand(1)); - - return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1); + return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1, N->getFlags()); } SDValue DAGTypeLegalizer::PromoteFloatRes_FMAD(SDNode *N) { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 9f060a09a0f3..cd114d668e20 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -53,6 +53,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break; case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break; case ISD::BITCAST: Res = PromoteIntRes_BITCAST(N); break; + case ISD::BITREVERSE: Res = PromoteIntRes_BITREVERSE(N); break; case ISD::BSWAP: Res = PromoteIntRes_BSWAP(N); break; case ISD::BUILD_PAIR: Res = PromoteIntRes_BUILD_PAIR(N); break; case ISD::Constant: Res = PromoteIntRes_Constant(N); break; @@ -65,16 +66,20 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break; case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break; - case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N));break; - case ISD::MLOAD: Res = PromoteIntRes_MLOAD(cast<MaskedLoadSDNode>(N));break; + case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N)); break; + case ISD::MLOAD: Res = PromoteIntRes_MLOAD(cast<MaskedLoadSDNode>(N)); + break; + case ISD::MGATHER: Res = PromoteIntRes_MGATHER(cast<MaskedGatherSDNode>(N)); + break; case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break; case ISD::VSELECT: Res = PromoteIntRes_VSELECT(N); break; case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break; case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break; case ISD::SMIN: - case ISD::SMAX: + case ISD::SMAX: Res = PromoteIntRes_SExtIntBinOp(N); break; case ISD::UMIN: - case ISD::UMAX: Res = PromoteIntRes_SimpleIntBinOp(N); break; + case ISD::UMAX: Res = PromoteIntRes_ZExtIntBinOp(N); break; + case ISD::SHL: Res = PromoteIntRes_SHL(N); break; case ISD::SIGN_EXTEND_INREG: Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break; @@ -114,10 +119,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break; case ISD::SDIV: - case ISD::SREM: Res = PromoteIntRes_SDIV(N); break; + case ISD::SREM: Res = PromoteIntRes_SExtIntBinOp(N); break; case ISD::UDIV: - case ISD::UREM: Res = PromoteIntRes_UDIV(N); break; + case ISD::UREM: Res = PromoteIntRes_ZExtIntBinOp(N); break; case ISD::SADDO: case ISD::SSUBO: Res = PromoteIntRes_SADDSUBO(N, ResNo); break; @@ -180,7 +185,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) { N->getChain(), N->getBasePtr(), N->getMemOperand(), N->getOrdering(), N->getSynchScope()); - // Legalized the chain result - switch anything that used the old chain to + // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); return Res; @@ -193,7 +198,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) { N->getChain(), N->getBasePtr(), Op2, N->getMemOperand(), N->getOrdering(), N->getSynchScope()); - // Legalized the chain result - switch anything that used the old chain to + // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); return Res; @@ -316,6 +321,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { TLI.getShiftAmountTy(NVT, DAG.getDataLayout()))); } +SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + EVT OVT = N->getValueType(0); + EVT NVT = Op.getValueType(); + SDLoc dl(N); + + unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); + return DAG.getNode( + ISD::SRL, dl, NVT, DAG.getNode(ISD::BITREVERSE, dl, NVT, Op), + DAG.getConstant(DiffBits, dl, + TLI.getShiftAmountTy(NVT, DAG.getDataLayout()))); +} + SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) { // The pair element type may be legal, or may not promote to the same type as // the result, for example i14 = BUILD_PAIR (i7, i7). Handle all cases. @@ -465,7 +483,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) { SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(), N->getMemoryVT(), N->getMemOperand()); - // Legalized the chain result - switch anything that used the old chain to + // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); return Res; @@ -475,20 +493,34 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue ExtSrc0 = GetPromotedInteger(N->getSrc0()); - SDValue Mask = N->getMask(); - EVT NewMaskVT = getSetCCResultType(NVT); - if (NewMaskVT != N->getMask().getValueType()) - Mask = PromoteTargetBoolean(Mask, NewMaskVT); SDLoc dl(N); - SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(), - Mask, ExtSrc0, N->getMemoryVT(), + N->getMask(), ExtSrc0, N->getMemoryVT(), N->getMemOperand(), ISD::SEXTLOAD); - // Legalized the chain result - switch anything that used the old chain to + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue ExtSrc0 = GetPromotedInteger(N->getValue()); + assert(NVT == ExtSrc0.getValueType() && + "Gather result type and the passThru agrument type should be the same"); + + SDLoc dl(N); + SDValue Ops[] = {N->getChain(), ExtSrc0, N->getMask(), N->getBasePtr(), + N->getIndex()}; + SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other), + N->getMemoryVT(), dl, Ops, + N->getMemOperand()); + // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); return Res; } + /// Promote the overflow flag of an overflowing arithmetic node. SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) { // Simply change the return type of the boolean result. @@ -534,14 +566,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) { return Res; } -SDValue DAGTypeLegalizer::PromoteIntRes_SDIV(SDNode *N) { - // Sign extend the input. - SDValue LHS = SExtPromotedInteger(N->getOperand(0)); - SDValue RHS = SExtPromotedInteger(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), SDLoc(N), - LHS.getValueType(), LHS, RHS); -} - SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) { SDValue LHS = GetPromotedInteger(N->getOperand(1)); SDValue RHS = GetPromotedInteger(N->getOperand(2)); @@ -629,6 +653,22 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) { LHS.getValueType(), LHS, RHS); } +SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N) { + // Sign extend the input. + SDValue LHS = SExtPromotedInteger(N->getOperand(0)); + SDValue RHS = SExtPromotedInteger(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), SDLoc(N), + LHS.getValueType(), LHS, RHS); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N) { + // Zero extend the input. + SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); + SDValue RHS = ZExtPromotedInteger(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), SDLoc(N), + LHS.getValueType(), LHS, RHS); +} + SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) { SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); @@ -770,14 +810,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { return Mul; } -SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) { - // Zero extend the input. - SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); - SDValue RHS = ZExtPromotedInteger(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), SDLoc(N), - LHS.getValueType(), LHS, RHS); -} - SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) { return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0))); @@ -875,6 +907,10 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { OpNo); break; case ISD::MLOAD: Res = PromoteIntOp_MLOAD(cast<MaskedLoadSDNode>(N), OpNo); break; + case ISD::MGATHER: Res = PromoteIntOp_MGATHER(cast<MaskedGatherSDNode>(N), + OpNo); break; + case ISD::MSCATTER: Res = PromoteIntOp_MSCATTER(cast<MaskedScatterSDNode>(N), + OpNo); break; case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break; case ISD::FP16_TO_FP: case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break; @@ -1143,56 +1179,49 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){ N->getMemoryVT(), N->getMemOperand()); } -SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo){ +SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, + unsigned OpNo) { SDValue DataOp = N->getValue(); EVT DataVT = DataOp.getValueType(); SDValue Mask = N->getMask(); - EVT MaskVT = Mask.getValueType(); SDLoc dl(N); bool TruncateStore = false; - if (!TLI.isTypeLegal(DataVT)) { - if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) { - DataOp = GetPromotedInteger(DataOp); - if (!TLI.isTypeLegal(MaskVT)) - Mask = PromoteTargetBoolean(Mask, DataOp.getValueType()); - TruncateStore = true; - } + if (OpNo == 2) { + // Mask comes before the data operand. If the data operand is legal, we just + // promote the mask. + // When the data operand has illegal type, we should legalize the data + // operand first. The mask will be promoted/splitted/widened according to + // the data operand type. + if (TLI.isTypeLegal(DataVT)) + Mask = PromoteTargetBoolean(Mask, DataVT); else { - assert(getTypeAction(DataVT) == TargetLowering::TypeWidenVector && - "Unexpected data legalization in MSTORE"); - DataOp = GetWidenedVector(DataOp); - - if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector) - Mask = GetWidenedVector(Mask); - else { - EVT BoolVT = getSetCCResultType(DataOp.getValueType()); + if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) + return PromoteIntOp_MSTORE(N, 3); - // We can't use ModifyToType() because we should fill the mask with - // zeroes - unsigned WidenNumElts = BoolVT.getVectorNumElements(); - unsigned MaskNumElts = MaskVT.getVectorNumElements(); + else if (getTypeAction(DataVT) == TargetLowering::TypeWidenVector) + return WidenVecOp_MSTORE(N, 3); - unsigned NumConcat = WidenNumElts / MaskNumElts; - SmallVector<SDValue, 16> Ops(NumConcat); - SDValue ZeroVal = DAG.getConstant(0, dl, MaskVT); - Ops[0] = Mask; - for (unsigned i = 1; i != NumConcat; ++i) - Ops[i] = ZeroVal; - - Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops); + else { + assert (getTypeAction(DataVT) == TargetLowering::TypeSplitVector); + return SplitVecOp_MSTORE(N, 3); } } + } else { // Data operand + assert(OpNo == 3 && "Unexpected operand for promotion"); + DataOp = GetPromotedInteger(DataOp); + Mask = PromoteTargetBoolean(Mask, DataOp.getValueType()); + TruncateStore = true; } - else - Mask = PromoteTargetBoolean(N->getMask(), DataOp.getValueType()); + return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), Mask, N->getMemoryVT(), N->getMemOperand(), TruncateStore); } -SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo){ +SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, + unsigned OpNo) { assert(OpNo == 2 && "Only know how to promote the mask!"); EVT DataVT = N->getValueType(0); SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); @@ -1201,6 +1230,31 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo) return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); } +SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N, + unsigned OpNo) { + + SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end()); + if (OpNo == 2) { + // The Mask + EVT DataVT = N->getValueType(0); + NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); + } else + NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo)); + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, + unsigned OpNo) { + SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end()); + if (OpNo == 2) { + // The Mask + EVT DataVT = N->getValue().getValueType(); + NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); + } else + NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo)); + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); +} + SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), Op); @@ -1259,6 +1313,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::ANY_EXTEND: ExpandIntRes_ANY_EXTEND(N, Lo, Hi); break; case ISD::AssertSext: ExpandIntRes_AssertSext(N, Lo, Hi); break; case ISD::AssertZext: ExpandIntRes_AssertZext(N, Lo, Hi); break; + case ISD::BITREVERSE: ExpandIntRes_BITREVERSE(N, Lo, Hi); break; case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break; case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break; case ISD::CTLZ_ZERO_UNDEF: @@ -1270,6 +1325,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break; case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break; case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break; + case ISD::READCYCLECOUNTER: ExpandIntRes_READCYCLECOUNTER(N, Lo, Hi); break; case ISD::SDIV: ExpandIntRes_SDIV(N, Lo, Hi); break; case ISD::SIGN_EXTEND: ExpandIntRes_SIGN_EXTEND(N, Lo, Hi); break; case ISD::SIGN_EXTEND_INREG: ExpandIntRes_SIGN_EXTEND_INREG(N, Lo, Hi); break; @@ -1763,12 +1819,6 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N, ReplaceValueWith(SDValue(N, 1), Hi.getValue(1)); } -void DAGTypeLegalizer::ExpandIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo, - SDValue &Lo, SDValue &Hi) { - SDValue Res = DisintegrateMERGE_VALUES(N, ResNo); - SplitInteger(Res, Lo, Hi); -} - void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); @@ -1834,6 +1884,14 @@ void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N, } } +void DAGTypeLegalizer::ExpandIntRes_BITREVERSE(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDLoc dl(N); + GetExpandedInteger(N->getOperand(0), Hi, Lo); // Note swapped operands. + Lo = DAG.getNode(ISD::BITREVERSE, dl, Lo.getValueType(), Lo); + Hi = DAG.getNode(ISD::BITREVERSE, dl, Hi.getValueType(), Hi); +} + void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); @@ -1918,8 +1976,7 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, true/*irrelevant*/, - dl).first, + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, true/*irrelevant*/, dl).first, Lo, Hi); } @@ -1934,8 +1991,7 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, false/*irrelevant*/, - dl).first, + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, false/*irrelevant*/, dl).first, Lo, Hi); } @@ -2055,7 +2111,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, } } - // Legalized the chain result - switch anything that used the old chain to + // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Ch); } @@ -2096,11 +2152,21 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true/*irrelevant*/, - dl).first, + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true/*irrelevant*/, dl).first, Lo, Hi); } +void DAGTypeLegalizer::ExpandIntRes_READCYCLECOUNTER(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc DL(N); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDVTList VTs = DAG.getVTList(NVT, NVT, MVT::Other); + SDValue R = DAG.getNode(N->getOpcode(), DL, VTs, N->getOperand(0)); + Lo = R.getValue(0); + Hi = R.getValue(1); + ReplaceValueWith(SDValue(N, 1), R.getValue(2)); +} + void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node, SDValue &Lo, SDValue &Hi) { SDValue LHS = Node->getOperand(0); @@ -2166,7 +2232,7 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, LC = RTLIB::SDIV_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl).first, Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, @@ -2261,8 +2327,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, isSigned, dl).first, Lo, - Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, isSigned, dl).first, Lo, Hi); return; } @@ -2352,7 +2417,7 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, LC = RTLIB::SREM_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl).first, Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N, @@ -2499,7 +2564,7 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, LC = RTLIB::UDIV_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl).first, Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, false, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, @@ -2525,7 +2590,7 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, LC = RTLIB::UREM_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl).first, Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, false, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, @@ -2605,6 +2670,7 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break; case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break; case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break; + case ISD::SETCCE: Res = ExpandIntOp_SETCCE(N); break; case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break; case ISD::STORE: Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break; case ISD::TRUNCATE: Res = ExpandIntOp_TRUNCATE(N); break; @@ -2732,6 +2798,47 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, return; } + if (LHSHi == RHSHi) { + // Comparing the low bits is enough. + NewLHS = Tmp1; + NewRHS = SDValue(); + return; + } + + // Lower with SETCCE if the target supports it. + // FIXME: Make all targets support this, then remove the other lowering. + if (TLI.getOperationAction( + ISD::SETCCE, + TLI.getTypeToExpandTo(*DAG.getContext(), LHSLo.getValueType())) == + TargetLowering::Custom) { + // SETCCE can detect < and >= directly. For > and <=, flip operands and + // condition code. + bool FlipOperands = false; + switch (CCCode) { + case ISD::SETGT: CCCode = ISD::SETLT; FlipOperands = true; break; + case ISD::SETUGT: CCCode = ISD::SETULT; FlipOperands = true; break; + case ISD::SETLE: CCCode = ISD::SETGE; FlipOperands = true; break; + case ISD::SETULE: CCCode = ISD::SETUGE; FlipOperands = true; break; + default: break; + } + if (FlipOperands) { + std::swap(LHSLo, RHSLo); + std::swap(LHSHi, RHSHi); + } + // Perform a wide subtraction, feeding the carry from the low part into + // SETCCE. The SETCCE operation is essentially looking at the high part of + // the result of LHS - RHS. It is negative iff LHS < RHS. It is zero or + // positive iff LHS >= RHS. + SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), MVT::Glue); + SDValue LowCmp = DAG.getNode(ISD::SUBC, dl, VTList, LHSLo, RHSLo); + SDValue Res = + DAG.getNode(ISD::SETCCE, dl, getSetCCResultType(LHSLo.getValueType()), + LHSHi, RHSHi, LowCmp.getValue(1), DAG.getCondCode(CCCode)); + NewLHS = Res; + NewRHS = SDValue(); + return; + } + NewLHS = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()), LHSHi, RHSHi, ISD::SETEQ, false, DagCombineInfo, dl); @@ -2796,6 +2903,24 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) { DAG.getCondCode(CCCode)), 0); } +SDValue DAGTypeLegalizer::ExpandIntOp_SETCCE(SDNode *N) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue Carry = N->getOperand(2); + SDValue Cond = N->getOperand(3); + SDLoc dl = SDLoc(N); + + SDValue LHSLo, LHSHi, RHSLo, RHSHi; + GetExpandedInteger(LHS, LHSLo, LHSHi); + GetExpandedInteger(RHS, RHSLo, RHSHi); + + // Expand to a SUBE for the low part and a smaller SETCCE for the high. + SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), MVT::Glue); + SDValue LowCmp = DAG.getNode(ISD::SUBE, dl, VTList, LHSLo, RHSLo, Carry); + return DAG.getNode(ISD::SETCCE, dl, N->getValueType(0), LHSHi, RHSHi, + LowCmp.getValue(1), Cond); +} + SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) { // The value being shifted is legal, but the shift amount is too big. // It follows that either the result of the shift is undefined, or the @@ -2820,7 +2945,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) { RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Don't know how to expand this SINT_TO_FP!"); - return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, SDLoc(N)).first; + return TLI.makeLibCall(DAG, LC, DstVT, Op, true, SDLoc(N)).first; } SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { @@ -2980,11 +3105,10 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { // Load the value out, extending it from f32 to the destination float type. // FIXME: Avoid the extend by constructing the right constant pool? - SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(), - FudgePtr, - MachinePointerInfo::getConstantPool(), - MVT::f32, - false, false, false, Alignment); + SDValue Fudge = DAG.getExtLoad( + ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(), FudgePtr, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32, + false, false, false, Alignment); return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge); } @@ -2992,7 +3116,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Don't know how to expand this UINT_TO_FP!"); - return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, dl).first; + return TLI.makeLibCall(DAG, LC, DstVT, Op, true, dl).first; } SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 54cfaf570619..2a0b0aa44794 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -73,21 +73,20 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { // (for example because it was created but not used). In general, we cannot // distinguish between new nodes and deleted nodes. SmallVector<SDNode*, 16> NewNodes; - for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), - E = DAG.allnodes_end(); I != E; ++I) { + for (SDNode &Node : DAG.allnodes()) { // Remember nodes marked NewNode - they are subject to extra checking below. - if (I->getNodeId() == NewNode) - NewNodes.push_back(I); + if (Node.getNodeId() == NewNode) + NewNodes.push_back(&Node); - for (unsigned i = 0, e = I->getNumValues(); i != e; ++i) { - SDValue Res(I, i); + for (unsigned i = 0, e = Node.getNumValues(); i != e; ++i) { + SDValue Res(&Node, i); bool Failed = false; unsigned Mapped = 0; if (ReplacedValues.find(Res) != ReplacedValues.end()) { Mapped |= 1; // Check that remapped values are only used by nodes marked NewNode. - for (SDNode::use_iterator UI = I->use_begin(), UE = I->use_end(); + for (SDNode::use_iterator UI = Node.use_begin(), UE = Node.use_end(); UI != UE; ++UI) if (UI.getUse().getResNo() == i) assert(UI->getNodeId() == NewNode && @@ -119,16 +118,16 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { if (WidenedVectors.find(Res) != WidenedVectors.end()) Mapped |= 128; - if (I->getNodeId() != Processed) { + if (Node.getNodeId() != Processed) { // Since we allow ReplacedValues to map deleted nodes, it may map nodes // marked NewNode too, since a deleted node may have been reallocated as // another node that has not been seen by the LegalizeTypes machinery. - if ((I->getNodeId() == NewNode && Mapped > 1) || - (I->getNodeId() != NewNode && Mapped != 0)) { + if ((Node.getNodeId() == NewNode && Mapped > 1) || + (Node.getNodeId() != NewNode && Mapped != 0)) { dbgs() << "Unprocessed value in a map!"; Failed = true; } - } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(I)) { + } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(&Node)) { if (Mapped > 1) { dbgs() << "Value with legal type was transformed!"; Failed = true; @@ -194,13 +193,12 @@ bool DAGTypeLegalizer::run() { // Walk all nodes in the graph, assigning them a NodeId of 'ReadyToProcess' // (and remembering them) if they are leaves and assigning 'Unanalyzed' if // non-leaves. - for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), - E = DAG.allnodes_end(); I != E; ++I) { - if (I->getNumOperands() == 0) { - I->setNodeId(ReadyToProcess); - Worklist.push_back(I); + for (SDNode &Node : DAG.allnodes()) { + if (Node.getNumOperands() == 0) { + Node.setNodeId(ReadyToProcess); + Worklist.push_back(&Node); } else { - I->setNodeId(Unanalyzed); + Node.setNodeId(Unanalyzed); } } @@ -240,9 +238,13 @@ bool DAGTypeLegalizer::run() { Changed = true; goto NodeDone; case TargetLowering::TypeSoftenFloat: - SoftenFloatResult(N, i); - Changed = true; - goto NodeDone; + Changed = SoftenFloatResult(N, i); + if (Changed) + goto NodeDone; + // If not changed, the result type should be legally in register. + assert(isLegalInHWReg(ResultVT) && + "Unchanged SoftenFloatResult should be legal in register!"); + goto ScanOperands; case TargetLowering::TypeExpandFloat: ExpandFloatResult(N, i); Changed = true; @@ -409,40 +411,48 @@ NodeDone: // In a debug build, scan all the nodes to make sure we found them all. This // ensures that there are no cycles and that everything got processed. #ifndef NDEBUG - for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), - E = DAG.allnodes_end(); I != E; ++I) { + for (SDNode &Node : DAG.allnodes()) { bool Failed = false; // Check that all result types are legal. - if (!IgnoreNodeResults(I)) - for (unsigned i = 0, NumVals = I->getNumValues(); i < NumVals; ++i) - if (!isTypeLegal(I->getValueType(i))) { - dbgs() << "Result type " << i << " illegal!\n"; + // A value type is illegal if its TypeAction is not TypeLegal, + // and TLI.RegClassForVT does not have a register class for this type. + // For example, the x86_64 target has f128 that is not TypeLegal, + // to have softened operators, but it also has FR128 register class to + // pass and return f128 values. Hence a legalized node can have f128 type. + if (!IgnoreNodeResults(&Node)) + for (unsigned i = 0, NumVals = Node.getNumValues(); i < NumVals; ++i) + if (!isTypeLegal(Node.getValueType(i)) && + !TLI.isTypeLegal(Node.getValueType(i))) { + dbgs() << "Result type " << i << " illegal: "; + Node.dump(); Failed = true; } // Check that all operand types are legal. - for (unsigned i = 0, NumOps = I->getNumOperands(); i < NumOps; ++i) - if (!IgnoreNodeResults(I->getOperand(i).getNode()) && - !isTypeLegal(I->getOperand(i).getValueType())) { - dbgs() << "Operand type " << i << " illegal!\n"; + for (unsigned i = 0, NumOps = Node.getNumOperands(); i < NumOps; ++i) + if (!IgnoreNodeResults(Node.getOperand(i).getNode()) && + !isTypeLegal(Node.getOperand(i).getValueType()) && + !TLI.isTypeLegal(Node.getOperand(i).getValueType())) { + dbgs() << "Operand type " << i << " illegal: "; + Node.getOperand(i).dump(); Failed = true; } - if (I->getNodeId() != Processed) { - if (I->getNodeId() == NewNode) + if (Node.getNodeId() != Processed) { + if (Node.getNodeId() == NewNode) dbgs() << "New node not analyzed?\n"; - else if (I->getNodeId() == Unanalyzed) + else if (Node.getNodeId() == Unanalyzed) dbgs() << "Unanalyzed node not noticed?\n"; - else if (I->getNodeId() > 0) + else if (Node.getNodeId() > 0) dbgs() << "Operand not processed?\n"; - else if (I->getNodeId() == ReadyToProcess) + else if (Node.getNodeId() == ReadyToProcess) dbgs() << "Not added to worklist?\n"; Failed = true; } if (Failed) { - I->dump(&DAG); dbgs() << "\n"; + Node.dump(&DAG); dbgs() << "\n"; llvm_unreachable(nullptr); } } @@ -751,13 +761,23 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { } void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { - assert(Result.getValueType() == - TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + // f128 of x86_64 could be kept in SSE registers, + // but sometimes softened to i128. + assert((Result.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) || + Op.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) && "Invalid type for softened float"); AnalyzeNewValue(Result); SDValue &OpEntry = SoftenedFloats[Op]; - assert(!OpEntry.getNode() && "Node is already converted to integer!"); + // Allow repeated calls to save f128 type nodes + // or any node with type that transforms to itself. + // Many operations on these types are not softened. + assert((!OpEntry.getNode()|| + Op.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) && + "Node is already converted to integer!"); OpEntry = Result; } @@ -1042,23 +1062,22 @@ SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N, unsigned NumOps = N->getNumOperands(); SDLoc dl(N); if (NumOps == 0) { - return TLI.makeLibCall(DAG, LC, N->getValueType(0), nullptr, 0, isSigned, + return TLI.makeLibCall(DAG, LC, N->getValueType(0), None, isSigned, dl).first; } else if (NumOps == 1) { SDValue Op = N->getOperand(0); - return TLI.makeLibCall(DAG, LC, N->getValueType(0), &Op, 1, isSigned, + return TLI.makeLibCall(DAG, LC, N->getValueType(0), Op, isSigned, dl).first; } else if (NumOps == 2) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, 2, isSigned, + return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, isSigned, dl).first; } SmallVector<SDValue, 8> Ops(NumOps); for (unsigned i = 0; i < NumOps; ++i) Ops[i] = N->getOperand(i); - return TLI.makeLibCall(DAG, LC, N->getValueType(0), - &Ops[0], NumOps, isSigned, dl).first; + return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, isSigned, dl).first; } // ExpandChainLibCall - Expand a node into a call to a libcall. Similar to @@ -1108,6 +1127,23 @@ SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT ValVT) { return DAG.getNode(ExtendCode, dl, BoolVT, Bool); } +/// WidenTargetBoolean - Widen the given target boolean to a target boolean +/// of the given type. The boolean vector is widened and then promoted to match +/// the target boolean type of the given ValVT. +SDValue DAGTypeLegalizer::WidenTargetBoolean(SDValue Bool, EVT ValVT, + bool WithZeroes) { + SDLoc dl(Bool); + EVT BoolVT = Bool.getValueType(); + + assert(ValVT.getVectorNumElements() > BoolVT.getVectorNumElements() && + TLI.isTypeLegal(ValVT) && + "Unexpected types in WidenTargetBoolean"); + EVT WideVT = EVT::getVectorVT(*DAG.getContext(), BoolVT.getScalarType(), + ValVT.getVectorNumElements()); + Bool = ModifyToType(Bool, WideVT, WithZeroes); + return PromoteTargetBoolean(Bool, ValVT); +} + /// SplitInteger - Return the lower LoVT bits of Op in Lo and the upper HiVT /// bits in Hi. void DAGTypeLegalizer::SplitInteger(SDValue Op, diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index d1131a74cf17..8ba19f76797f 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -72,6 +72,20 @@ private: return TLI.getTypeAction(*DAG.getContext(), VT) == TargetLowering::TypeLegal; } + /// isSimpleLegalType - Return true if this is a simple legal type. + bool isSimpleLegalType(EVT VT) const { + return VT.isSimple() && TLI.isTypeLegal(VT); + } + + /// isLegalInHWReg - Return true if this type can be passed in registers. + /// For example, x86_64's f128, should to be legally in registers + /// and only some operations converted to library calls or integer + /// bitwise operations. + bool isLegalInHWReg(EVT VT) const { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + return VT == NVT && isSimpleLegalType(VT); + } + EVT getSetCCResultType(EVT VT) const { return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); } @@ -173,6 +187,11 @@ private: std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node); SDValue PromoteTargetBoolean(SDValue Bool, EVT ValVT); + + /// Modify Bit Vector to match SetCC result type of ValVT. + /// The bit vector is widened with zeroes when WithZeroes is true. + SDValue WidenTargetBoolean(SDValue Bool, EVT ValVT, bool WithZeroes = false); + void ReplaceValueWith(SDValue From, SDValue To); void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi); void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT, @@ -234,6 +253,7 @@ private: SDValue PromoteIntRes_CONCAT_VECTORS(SDNode *N); SDValue PromoteIntRes_BITCAST(SDNode *N); SDValue PromoteIntRes_BSWAP(SDNode *N); + SDValue PromoteIntRes_BITREVERSE(SDNode *N); SDValue PromoteIntRes_BUILD_PAIR(SDNode *N); SDValue PromoteIntRes_Constant(SDNode *N); SDValue PromoteIntRes_CONVERT_RNDSAT(SDNode *N); @@ -246,21 +266,22 @@ private: SDValue PromoteIntRes_INT_EXTEND(SDNode *N); SDValue PromoteIntRes_LOAD(LoadSDNode *N); SDValue PromoteIntRes_MLOAD(MaskedLoadSDNode *N); + SDValue PromoteIntRes_MGATHER(MaskedGatherSDNode *N); SDValue PromoteIntRes_Overflow(SDNode *N); SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo); - SDValue PromoteIntRes_SDIV(SDNode *N); SDValue PromoteIntRes_SELECT(SDNode *N); SDValue PromoteIntRes_VSELECT(SDNode *N); SDValue PromoteIntRes_SELECT_CC(SDNode *N); SDValue PromoteIntRes_SETCC(SDNode *N); SDValue PromoteIntRes_SHL(SDNode *N); SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N); + SDValue PromoteIntRes_ZExtIntBinOp(SDNode *N); + SDValue PromoteIntRes_SExtIntBinOp(SDNode *N); SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N); SDValue PromoteIntRes_SRA(SDNode *N); SDValue PromoteIntRes_SRL(SDNode *N); SDValue PromoteIntRes_TRUNCATE(SDNode *N); SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo); - SDValue PromoteIntRes_UDIV(SDNode *N); SDValue PromoteIntRes_UNDEF(SDNode *N); SDValue PromoteIntRes_VAARG(SDNode *N); SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo); @@ -276,7 +297,6 @@ private: SDValue PromoteIntOp_BUILD_VECTOR(SDNode *N); SDValue PromoteIntOp_CONVERT_RNDSAT(SDNode *N); SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo); - SDValue PromoteIntOp_EXTRACT_ELEMENT(SDNode *N); SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N); @@ -284,7 +304,6 @@ private: SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo); - SDValue PromoteIntOp_VSETCC(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_Shift(SDNode *N); SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N); SDValue PromoteIntOp_SINT_TO_FP(SDNode *N); @@ -294,6 +313,8 @@ private: SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N); SDValue PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo); SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo); + SDValue PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo); + SDValue PromoteIntOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo); void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code); @@ -312,8 +333,6 @@ private: // Integer Result Expansion. void ExpandIntegerResult(SDNode *N, unsigned ResNo); - void ExpandIntRes_MERGE_VALUES (SDNode *N, unsigned ResNo, - SDValue &Lo, SDValue &Hi); void ExpandIntRes_ANY_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_AssertSext (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -322,6 +341,7 @@ private: void ExpandIntRes_CTPOP (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_CTTZ (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_LOAD (LoadSDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_READCYCLECOUNTER (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_SIGN_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -333,6 +353,7 @@ private: void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ADDSUBC (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ADDSUBE (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_BITREVERSE (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_BSWAP (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_MUL (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_SDIV (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -354,12 +375,10 @@ private: // Integer Operand Expansion. bool ExpandIntegerOperand(SDNode *N, unsigned OperandNo); - SDValue ExpandIntOp_BITCAST(SDNode *N); SDValue ExpandIntOp_BR_CC(SDNode *N); - SDValue ExpandIntOp_BUILD_VECTOR(SDNode *N); - SDValue ExpandIntOp_EXTRACT_ELEMENT(SDNode *N); SDValue ExpandIntOp_SELECT_CC(SDNode *N); SDValue ExpandIntOp_SETCC(SDNode *N); + SDValue ExpandIntOp_SETCCE(SDNode *N); SDValue ExpandIntOp_Shift(SDNode *N); SDValue ExpandIntOp_SINT_TO_FP(SDNode *N); SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo); @@ -375,32 +394,48 @@ private: // Float to Integer Conversion Support: LegalizeFloatTypes.cpp //===--------------------------------------------------------------------===// - /// GetSoftenedFloat - Given a processed operand Op which was converted to an - /// integer of the same size, this returns the integer. The integer contains - /// exactly the same bits as Op - only the type changed. For example, if Op - /// is an f32 which was softened to an i32, then this method returns an i32, - /// the bits of which coincide with those of Op. + /// GetSoftenedFloat - Given an operand Op of Float type, returns the integer + /// if the Op is not supported in target HW and converted to the integer. + /// The integer contains exactly the same bits as Op - only the type changed. + /// For example, if Op is an f32 which was softened to an i32, then this method + /// returns an i32, the bits of which coincide with those of Op. + /// If the Op can be efficiently supported in target HW or the operand must + /// stay in a register, the Op is not converted to an integer. + /// In that case, the given op is returned. SDValue GetSoftenedFloat(SDValue Op) { SDValue &SoftenedOp = SoftenedFloats[Op]; + if (!SoftenedOp.getNode() && + isSimpleLegalType(Op.getValueType())) + return Op; RemapValue(SoftenedOp); assert(SoftenedOp.getNode() && "Operand wasn't converted to integer?"); return SoftenedOp; } void SetSoftenedFloat(SDValue Op, SDValue Result); - // Result Float to Integer Conversion. - void SoftenFloatResult(SDNode *N, unsigned OpNo); + // Call ReplaceValueWith(SDValue(N, ResNo), Res) if necessary. + void ReplaceSoftenFloatResult(SDNode *N, unsigned ResNo, SDValue &NewRes) { + // When the result type can be kept in HW registers, the converted + // NewRes node could have the same type. We can save the effort in + // cloning every user of N in SoftenFloatOperand or other legalization functions, + // by calling ReplaceValueWith here to update all users. + if (NewRes.getNode() != N && isLegalInHWReg(N->getValueType(ResNo))) + ReplaceValueWith(SDValue(N, ResNo), NewRes); + } + + // Convert Float Results to Integer for Non-HW-supported Operations. + bool SoftenFloatResult(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo); - SDValue SoftenFloatRes_BITCAST(SDNode *N); + SDValue SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N); - SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N); + SDValue SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N); - SDValue SoftenFloatRes_FABS(SDNode *N); + SDValue SoftenFloatRes_FABS(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_FMINNUM(SDNode *N); SDValue SoftenFloatRes_FMAXNUM(SDNode *N); SDValue SoftenFloatRes_FADD(SDNode *N); SDValue SoftenFloatRes_FCEIL(SDNode *N); - SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N); + SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_FCOS(SDNode *N); SDValue SoftenFloatRes_FDIV(SDNode *N); SDValue SoftenFloatRes_FEXP(SDNode *N); @@ -412,7 +447,7 @@ private: SDValue SoftenFloatRes_FMA(SDNode *N); SDValue SoftenFloatRes_FMUL(SDNode *N); SDValue SoftenFloatRes_FNEARBYINT(SDNode *N); - SDValue SoftenFloatRes_FNEG(SDNode *N); + SDValue SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_FP_EXTEND(SDNode *N); SDValue SoftenFloatRes_FP16_TO_FP(SDNode *N); SDValue SoftenFloatRes_FP_ROUND(SDNode *N); @@ -425,21 +460,25 @@ private: SDValue SoftenFloatRes_FSQRT(SDNode *N); SDValue SoftenFloatRes_FSUB(SDNode *N); SDValue SoftenFloatRes_FTRUNC(SDNode *N); - SDValue SoftenFloatRes_LOAD(SDNode *N); - SDValue SoftenFloatRes_SELECT(SDNode *N); - SDValue SoftenFloatRes_SELECT_CC(SDNode *N); + SDValue SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_UNDEF(SDNode *N); SDValue SoftenFloatRes_VAARG(SDNode *N); SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N); - // Operand Float to Integer Conversion. + // Return true if we can skip softening the given operand or SDNode because + // it was soften before by SoftenFloatResult and references to the operand + // were replaced by ReplaceValueWith. + bool CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo); + + // Convert Float Operand to Integer for Non-HW-supported Operations. bool SoftenFloatOperand(SDNode *N, unsigned OpNo); SDValue SoftenFloatOp_BITCAST(SDNode *N); SDValue SoftenFloatOp_BR_CC(SDNode *N); SDValue SoftenFloatOp_FP_EXTEND(SDNode *N); SDValue SoftenFloatOp_FP_ROUND(SDNode *N); - SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N); - SDValue SoftenFloatOp_FP_TO_UINT(SDNode *N); + SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N); SDValue SoftenFloatOp_SELECT_CC(SDNode *N); SDValue SoftenFloatOp_SETCC(SDNode *N); SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo); @@ -575,7 +614,6 @@ private: SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); - SDValue ScalarizeVecRes_SIGN_EXTEND_INREG(SDNode *N); SDValue ScalarizeVecRes_VSELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT_CC(SDNode *N); @@ -617,20 +655,18 @@ private: void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi); - void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_MLOAD(MaskedLoadSDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_MGATHER(MaskedGatherSDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); - void SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi); - void SplitVecRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo, SDValue &Hi); @@ -650,6 +686,7 @@ private: SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N); SDValue SplitVecOp_VSETCC(SDNode *N); SDValue SplitVecOp_FP_ROUND(SDNode *N); + SDValue SplitVecOp_FCOPYSIGN(SDNode *N); //===--------------------------------------------------------------------===// // Vector Widening Support: LegalizeVectorTypes.cpp @@ -680,8 +717,8 @@ private: SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); SDValue WidenVecRes_LOAD(SDNode* N); SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N); + SDValue WidenVecRes_MGATHER(MaskedGatherSDNode* N); SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N); - SDValue WidenVecRes_SIGN_EXTEND_INREG(SDNode* N); SDValue WidenVecRes_SELECT(SDNode* N); SDValue WidenVecRes_SELECT_CC(SDNode* N); SDValue WidenVecRes_SETCC(SDNode* N); @@ -693,6 +730,7 @@ private: SDValue WidenVecRes_Binary(SDNode *N); SDValue WidenVecRes_BinaryCanTrap(SDNode *N); SDValue WidenVecRes_Convert(SDNode *N); + SDValue WidenVecRes_FCOPYSIGN(SDNode *N); SDValue WidenVecRes_POWI(SDNode *N); SDValue WidenVecRes_Shift(SDNode *N); SDValue WidenVecRes_Unary(SDNode *N); @@ -707,9 +745,11 @@ private: SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue WidenVecOp_STORE(SDNode* N); SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo); + SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo); SDValue WidenVecOp_SETCC(SDNode* N); SDValue WidenVecOp_Convert(SDNode *N); + SDValue WidenVecOp_FCOPYSIGN(SDNode *N); //===--------------------------------------------------------------------===// // Vector Widening Utilities Support: LegalizeVectorTypes.cpp @@ -745,8 +785,10 @@ private: /// Modifies a vector input (widen or narrows) to a vector of NVT. The /// input vector must have the same element type as NVT. - SDValue ModifyToType(SDValue InOp, EVT WidenVT); - + /// When FillWithZeroes is "on" the vector will be widened with + /// zeroes. + /// By default, the vector will be widened with undefined values. + SDValue ModifyToType(SDValue InOp, EVT NVT, bool FillWithZeroes = false); //===--------------------------------------------------------------------===// // Generic Splitting: LegalizeTypesGeneric.cpp diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 14d8f7762086..593c346df770 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -53,12 +53,17 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { case TargetLowering::TypePromoteFloat: llvm_unreachable("Bitcast of a promotion-needing float should never need" "expansion"); - case TargetLowering::TypeSoftenFloat: - // Convert the integer operand instead. - SplitInteger(GetSoftenedFloat(InOp), Lo, Hi); + case TargetLowering::TypeSoftenFloat: { + // Expand the floating point operand only if it was converted to integers. + // Otherwise, it is a legal type like f128 that can be saved in a register. + auto SoftenedOp = GetSoftenedFloat(InOp); + if (SoftenedOp == InOp) + break; + SplitInteger(SoftenedOp, Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; + } case TargetLowering::TypeExpandInteger: case TargetLowering::TypeExpandFloat: { auto &DL = DAG.getDataLayout(); @@ -161,7 +166,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { NOutVT.getTypeForEVT(*DAG.getContext())); SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment); int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); - MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI); + MachinePointerInfo PtrInfo = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); // Emit a store to the stack slot. SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, PtrInfo, diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 83d4ad5ea1f4..f61f631e2ff8 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -105,6 +105,8 @@ class VectorLegalizer { SDValue ExpandLoad(SDValue Op); SDValue ExpandStore(SDValue Op); SDValue ExpandFNEG(SDValue Op); + SDValue ExpandBITREVERSE(SDValue Op); + SDValue ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op); /// \brief Implements vector promotion. /// @@ -159,7 +161,7 @@ bool VectorLegalizer::Run() { DAG.AssignTopologicalOrder(); for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) - LegalizeOp(SDValue(I, 0)); + LegalizeOp(SDValue(&*I, 0)); // Finally, it's possible the root changed. Get the new root. SDValue OldRoot = DAG.getRoot(); @@ -218,9 +220,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { assert(Result.getValue(1).use_empty() && "There are still live users of the old chain!"); return LegalizeOp(Lowered); - } else { - return TranslateLegalizeResults(Op, Lowered); } + return TranslateLegalizeResults(Op, Lowered); } case TargetLowering::Expand: Changed = true; @@ -231,7 +232,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { EVT StVT = ST->getMemoryVT(); MVT ValVT = ST->getValue().getSimpleValueType(); if (StVT.isVector() && ST->isTruncatingStore()) - switch (TLI.getTruncStoreAction(ValVT, StVT.getSimpleVT())) { + switch (TLI.getTruncStoreAction(ValVT, StVT)) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: return TranslateLegalizeResults(Op, Result); @@ -244,7 +245,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { Changed = true; return LegalizeOp(ExpandStore(Op)); } - } else if (Op.getOpcode() == ISD::MSCATTER) + } else if (Op.getOpcode() == ISD::MSCATTER || Op.getOpcode() == ISD::MSTORE) HasVectorValue = true; for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end(); @@ -265,6 +266,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::UDIV: case ISD::SREM: case ISD::UREM: + case ISD::SDIVREM: + case ISD::UDIVREM: case ISD::FADD: case ISD::FSUB: case ISD::FMUL: @@ -279,6 +282,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::ROTL: case ISD::ROTR: case ISD::BSWAP: + case ISD::BITREVERSE: case ISD::CTLZ: case ISD::CTTZ: case ISD::CTLZ_ZERO_UNDEF: @@ -298,6 +302,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FABS: case ISD::FMINNUM: case ISD::FMAXNUM: + case ISD::FMINNAN: + case ISD::FMAXNAN: case ISD::FCOPYSIGN: case ISD::FSQRT: case ISD::FSIN: @@ -338,9 +344,13 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::MSCATTER: QueryType = cast<MaskedScatterSDNode>(Node)->getValue().getValueType(); break; + case ISD::MSTORE: + QueryType = cast<MaskedStoreSDNode>(Node)->getValue().getValueType(); + break; } switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) { + default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Promote: Result = Promote(Op); Changed = true; @@ -411,7 +421,7 @@ SDValue VectorLegalizer::Promote(SDValue Op) { Operands[j] = Op.getOperand(j); } - Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands); + Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands, Op.getNode()->getFlags()); if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) || (VT.isVector() && VT.getVectorElementType().isFloatingPoint() && NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())) @@ -708,6 +718,11 @@ SDValue VectorLegalizer::Expand(SDValue Op) { return ExpandFNEG(Op); case ISD::SETCC: return UnrollVSETCC(Op); + case ISD::BITREVERSE: + return ExpandBITREVERSE(Op); + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTTZ_ZERO_UNDEF: + return ExpandCTLZ_CTTZ_ZERO_UNDEF(Op); default: return DAG.UnrollVectorOp(Op.getNode()); } @@ -893,6 +908,25 @@ SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) { return DAG.getNode(ISD::BITCAST, DL, VT, Op); } +SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) { + EVT VT = Op.getValueType(); + + // If we have the scalar operation, it's probably cheaper to unroll it. + if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType())) + return DAG.UnrollVectorOp(Op.getNode()); + + // If we have the appropriate vector bit operations, it is better to use them + // than unrolling and expanding each component. + if (!TLI.isOperationLegalOrCustom(ISD::SHL, VT) || + !TLI.isOperationLegalOrCustom(ISD::SRL, VT) || + !TLI.isOperationLegalOrCustom(ISD::AND, VT) || + !TLI.isOperationLegalOrCustom(ISD::OR, VT)) + return DAG.UnrollVectorOp(Op.getNode()); + + // Let LegalizeDAG handle this later. + return Op; +} + SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { // Implement VSELECT in terms of XOR, AND, OR // on platforms which do not support blend natively. @@ -971,6 +1005,7 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { // Convert hi and lo to floats // Convert the hi part back to the upper values + // TODO: Can any fast-math-flags be set on these nodes? SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI); fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW); SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO); @@ -984,12 +1019,23 @@ SDValue VectorLegalizer::ExpandFNEG(SDValue Op) { if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) { SDLoc DL(Op); SDValue Zero = DAG.getConstantFP(-0.0, DL, Op.getValueType()); + // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB. return DAG.getNode(ISD::FSUB, DL, Op.getValueType(), Zero, Op.getOperand(0)); } return DAG.UnrollVectorOp(Op.getNode()); } +SDValue VectorLegalizer::ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op) { + // If the non-ZERO_UNDEF version is supported we can let LegalizeDAG handle. + unsigned Opc = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF ? ISD::CTLZ : ISD::CTTZ; + if (TLI.isOperationLegalOrCustom(Opc, Op.getValueType())) + return Op; + + // Otherwise go ahead and unroll. + return DAG.UnrollVectorOp(Op.getNode()); +} + SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { EVT VT = Op.getValueType(); unsigned NumElems = VT.getVectorNumElements(); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 51cd6619f783..d0187d36dee2 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -67,6 +67,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break; case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break; case ISD::ANY_EXTEND: + case ISD::BITREVERSE: case ISD::BSWAP: case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: @@ -108,6 +109,12 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::FMUL: case ISD::FMINNUM: case ISD::FMAXNUM: + case ISD::FMINNAN: + case ISD::FMAXNAN: + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: case ISD::FPOW: case ISD::FREM: @@ -139,7 +146,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) { SDValue LHS = GetScalarizedVector(N->getOperand(0)); SDValue RHS = GetScalarizedVector(N->getOperand(1)); return DAG.getNode(N->getOpcode(), SDLoc(N), - LHS.getValueType(), LHS, RHS); + LHS.getValueType(), LHS, RHS, N->getFlags()); } SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) { @@ -228,7 +235,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { N->isInvariant(), N->getOriginalAlignment(), N->getAAInfo()); - // Legalized the chain result - switch anything that used the old chain to + // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); return Result; @@ -594,6 +601,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break; case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break; + case ISD::FCOPYSIGN: SplitVecRes_FCOPYSIGN(N, Lo, Hi); break; case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break; case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break; case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; @@ -613,6 +621,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi); break; + case ISD::BITREVERSE: case ISD::BSWAP: case ISD::CONVERT_RNDSAT: case ISD::CTLZ: @@ -656,11 +665,12 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::SUB: case ISD::MUL: case ISD::FADD: - case ISD::FCOPYSIGN: case ISD::FSUB: case ISD::FMUL: case ISD::FMINNUM: case ISD::FMAXNUM: + case ISD::FMINNAN: + case ISD::FMAXNAN: case ISD::SDIV: case ISD::UDIV: case ISD::FDIV: @@ -698,8 +708,10 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, GetSplitVector(N->getOperand(1), RHSLo, RHSHi); SDLoc dl(N); - Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, RHSLo); - Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi); + const SDNodeFlags *Flags = N->getFlags(); + unsigned Opcode = N->getOpcode(); + Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags); + Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags); } void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, @@ -870,6 +882,25 @@ void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1)); } +void DAGTypeLegalizer::SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue LHSLo, LHSHi; + GetSplitVector(N->getOperand(0), LHSLo, LHSHi); + SDLoc DL(N); + + SDValue RHSLo, RHSHi; + SDValue RHS = N->getOperand(1); + EVT RHSVT = RHS.getValueType(); + if (getTypeAction(RHSVT) == TargetLowering::TypeSplitVector) + GetSplitVector(RHS, RHSLo, RHSHi); + else + std::tie(RHSLo, RHSHi) = DAG.SplitVector(RHS, SDLoc(RHS)); + + + Lo = DAG.getNode(ISD::FCOPYSIGN, DL, LHSLo.getValueType(), LHSLo, RHSLo); + Hi = DAG.getNode(ISD::FCOPYSIGN, DL, LHSHi.getValueType(), LHSHi, RHSHi); +} + void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue LHSLo, LHSHi; @@ -989,7 +1020,7 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); - // Legalized the chain result - switch anything that used the old chain to + // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(LD, 1), Ch); } @@ -1003,6 +1034,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue Ch = MLD->getChain(); SDValue Ptr = MLD->getBasePtr(); SDValue Mask = MLD->getMask(); + SDValue Src0 = MLD->getSrc0(); unsigned Alignment = MLD->getOriginalAlignment(); ISD::LoadExtType ExtType = MLD->getExtensionType(); @@ -1012,16 +1044,22 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, (Alignment == MLD->getValueType(0).getSizeInBits()/8) ? Alignment/2 : Alignment; + // Split Mask operand SDValue MaskLo, MaskHi; - std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); EVT MemoryVT = MLD->getMemoryVT(); EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); - SDValue Src0 = MLD->getSrc0(); SDValue Src0Lo, Src0Hi; - std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl); + if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Src0, Src0Lo, Src0Hi); + else + std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl); MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MLD->getPointerInfo(), @@ -1049,7 +1087,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); - // Legalized the chain result - switch anything that used the old chain to + // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(MLD, 1), Ch); @@ -1064,20 +1102,33 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue Ch = MGT->getChain(); SDValue Ptr = MGT->getBasePtr(); SDValue Mask = MGT->getMask(); + SDValue Src0 = MGT->getValue(); + SDValue Index = MGT->getIndex(); unsigned Alignment = MGT->getOriginalAlignment(); + // Split Mask operand SDValue MaskLo, MaskHi; - std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); EVT MemoryVT = MGT->getMemoryVT(); EVT LoMemVT, HiMemVT; + // Split MemoryVT std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); SDValue Src0Lo, Src0Hi; - std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(MGT->getValue(), dl); + if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Src0, Src0Lo, Src0Hi); + else + std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl); SDValue IndexHi, IndexLo; - std::tie(IndexLo, IndexHi) = DAG.SplitVector(MGT->getIndex(), dl); + if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Index, IndexLo, IndexHi); + else + std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl); MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MGT->getPointerInfo(), @@ -1097,7 +1148,7 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); - // Legalized the chain result - switch anything that used the old chain to + // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(MGT, 1), Ch); } @@ -1357,6 +1408,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { Res = SplitVecOp_TruncateHelper(N); break; case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break; + case ISD::FCOPYSIGN: Res = SplitVecOp_FCOPYSIGN(N); break; case ISD::STORE: Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo); break; @@ -1567,23 +1619,31 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, SDValue Ptr = MGT->getBasePtr(); SDValue Index = MGT->getIndex(); SDValue Mask = MGT->getMask(); + SDValue Src0 = MGT->getValue(); unsigned Alignment = MGT->getOriginalAlignment(); SDValue MaskLo, MaskHi; - std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + // Split Mask operand + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); EVT MemoryVT = MGT->getMemoryVT(); EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); SDValue Src0Lo, Src0Hi; - std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(MGT->getValue(), dl); + if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Src0, Src0Lo, Src0Hi); + else + std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl); SDValue IndexHi, IndexLo; - if (Index.getNode()) - std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl); + if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Index, IndexLo, IndexHi); else - IndexLo = IndexHi = Index; + std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl); MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MGT->getPointerInfo(), @@ -1609,7 +1669,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); - // Legalized the chain result - switch anything that used the old chain to + // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(MGT, 1), Ch); @@ -1633,9 +1693,21 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); SDValue DataLo, DataHi; - GetSplitVector(Data, DataLo, DataHi); + if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector) + // Split Data operand + GetSplitVector(Data, DataLo, DataHi); + else + std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); + SDValue MaskLo, MaskHi; - GetSplitVector(Mask, MaskLo, MaskHi); + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + // Split Mask operand + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); + + MaskLo = PromoteTargetBoolean(MaskLo, DataLo.getValueType()); + MaskHi = PromoteTargetBoolean(MaskHi, DataHi.getValueType()); // if Alignment is equal to the vector size, // take the half of it for the second part @@ -1680,25 +1752,29 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, unsigned Alignment = N->getOriginalAlignment(); SDLoc DL(N); + // Split all operands EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); SDValue DataLo, DataHi; - GetSplitVector(Data, DataLo, DataHi); - SDValue MaskLo, MaskHi; - GetSplitVector(Mask, MaskLo, MaskHi); + if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector) + // Split Data operand + GetSplitVector(Data, DataLo, DataHi); + else + std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); - SDValue PtrLo, PtrHi; - if (Ptr.getValueType().isVector()) // gather form vector of pointers - std::tie(PtrLo, PtrHi) = DAG.SplitVector(Ptr, DL); + SDValue MaskLo, MaskHi; + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + // Split Mask operand + GetSplitVector(Mask, MaskLo, MaskHi); else - PtrLo = PtrHi = Ptr; + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); SDValue IndexHi, IndexLo; - if (Index.getNode()) - std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL); + if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Index, IndexLo, IndexHi); else - IndexLo = IndexHi = Index; + std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL); SDValue Lo, Hi; MachineMemOperand *MMO = DAG.getMachineFunction(). @@ -1706,7 +1782,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, MachineMemOperand::MOStore, LoMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges()); - SDValue OpsLo[] = {Ch, DataLo, MaskLo, PtrLo, IndexLo}; + SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo}; Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(), DL, OpsLo, MMO); @@ -1715,7 +1791,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, MachineMemOperand::MOStore, HiMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges()); - SDValue OpsHi[] = {Ch, DataHi, MaskHi, PtrHi, IndexHi}; + SDValue OpsHi[] = {Ch, DataHi, MaskHi, Ptr, IndexHi}; Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), DL, OpsHi, MMO); @@ -1891,6 +1967,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) { return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi); } +SDValue DAGTypeLegalizer::SplitVecOp_FCOPYSIGN(SDNode *N) { + // The result (and the first input) has a legal vector type, but the second + // input needs splitting. + return DAG.UnrollVectorOp(N, N->getValueType(0).getVectorNumElements()); +} //===----------------------------------------------------------------------===// @@ -1938,6 +2019,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::MLOAD: Res = WidenVecRes_MLOAD(cast<MaskedLoadSDNode>(N)); break; + case ISD::MGATHER: + Res = WidenVecRes_MGATHER(cast<MaskedGatherSDNode>(N)); + break; case ISD::ADD: case ISD::AND: @@ -1949,11 +2033,16 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::XOR: case ISD::FMINNUM: case ISD::FMAXNUM: + case ISD::FMINNAN: + case ISD::FMAXNAN: + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: Res = WidenVecRes_Binary(N); break; case ISD::FADD: - case ISD::FCOPYSIGN: case ISD::FMUL: case ISD::FPOW: case ISD::FSUB: @@ -1966,6 +2055,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_BinaryCanTrap(N); break; + case ISD::FCOPYSIGN: + Res = WidenVecRes_FCOPYSIGN(N); + break; + case ISD::FPOWI: Res = WidenVecRes_POWI(N); break; @@ -1989,6 +2082,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_Convert(N); break; + case ISD::BITREVERSE: case ISD::BSWAP: case ISD::CTLZ: case ISD::CTPOP: @@ -2037,7 +2131,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp1 = GetWidenedVector(N->getOperand(0)); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, N->getFlags()); } SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { @@ -2048,6 +2142,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { EVT WidenEltVT = WidenVT.getVectorElementType(); EVT VT = WidenVT; unsigned NumElts = VT.getVectorNumElements(); + const SDNodeFlags *Flags = N->getFlags(); while (!TLI.isTypeLegal(VT) && NumElts != 1) { NumElts = NumElts / 2; VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); @@ -2057,7 +2152,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { // Operation doesn't trap so just widen as normal. SDValue InOp1 = GetWidenedVector(N->getOperand(0)); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, Flags); } // No legal vector version so unroll the vector operation and then widen. @@ -2087,7 +2182,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { SDValue EOp2 = DAG.getNode( ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2, DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2); + ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2, Flags); Idx += NumElts; CurNumElts -= NumElts; } @@ -2105,7 +2200,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp2, DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT, - EOp1, EOp2); + EOp1, EOp2, Flags); } CurNumElts = 0; } @@ -2195,7 +2290,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { unsigned Opcode = N->getOpcode(); unsigned InVTNumElts = InVT.getVectorNumElements(); - + const SDNodeFlags *Flags = N->getFlags(); if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { InOp = GetWidenedVector(N->getOperand(0)); InVT = InOp.getValueType(); @@ -2203,7 +2298,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { if (InVTNumElts == WidenNumElts) { if (N->getNumOperands() == 1) return DAG.getNode(Opcode, DL, WidenVT, InOp); - return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1)); + return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags); } } @@ -2224,7 +2319,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops); if (N->getNumOperands() == 1) return DAG.getNode(Opcode, DL, WidenVT, InVec); - return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1)); + return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1), Flags); } if (InVTNumElts % WidenNumElts == 0) { @@ -2234,7 +2329,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { // Extract the input and convert the shorten input vector. if (N->getNumOperands() == 1) return DAG.getNode(Opcode, DL, WidenVT, InVal); - return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1)); + return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1), Flags); } } @@ -2250,7 +2345,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { if (N->getNumOperands() == 1) Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val); else - Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1)); + Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1), Flags); } SDValue UndefVal = DAG.getUNDEF(EltVT); @@ -2260,6 +2355,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, Ops); } +SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) { + // If this is an FCOPYSIGN with same input types, we can treat it as a + // normal (can trap) binary op. + if (N->getOperand(0).getValueType() == N->getOperand(1).getValueType()) + return WidenVecRes_BinaryCanTrap(N); + + // If the types are different, fall back to unrolling. + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); +} + SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp = GetWidenedVector(N->getOperand(0)); @@ -2669,7 +2775,35 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) { SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(), Mask, Src0, N->getMemoryVT(), N->getMemOperand(), ExtType); - // Legalized the chain result - switch anything that used the old chain to + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + +SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) { + + EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Mask = N->getMask(); + SDValue Src0 = GetWidenedVector(N->getValue()); + unsigned NumElts = WideVT.getVectorNumElements(); + SDLoc dl(N); + + // The mask should be widened as well + Mask = WidenTargetBoolean(Mask, WideVT, true); + + // Widen the Index operand + SDValue Index = N->getIndex(); + EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(), + Index.getValueType().getScalarType(), + NumElts); + Index = ModifyToType(Index, WideIndexVT); + SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index }; + SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), + N->getMemoryVT(), dl, Ops, + N->getMemOperand()); + + // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); return Res; @@ -2831,7 +2965,9 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::STORE: Res = WidenVecOp_STORE(N); break; case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break; + case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break; case ISD::SETCC: Res = WidenVecOp_SETCC(N); break; + case ISD::FCOPYSIGN: Res = WidenVecOp_FCOPYSIGN(N); break; case ISD::ANY_EXTEND: case ISD::SIGN_EXTEND: @@ -2928,6 +3064,13 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) { } } +SDValue DAGTypeLegalizer::WidenVecOp_FCOPYSIGN(SDNode *N) { + // The result (and first input) is legal, but the second input is illegal. + // We can't do much to fix that, so just unroll and let the extracts off of + // the second input be widened as needed later. + return DAG.UnrollVectorOp(N); +} + SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { // Since the result is legal and the input is illegal, it is unlikely // that we can fix the input to a legal type so unroll the convert @@ -3070,6 +3213,34 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) { false); } +SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) { + assert(OpNo == 1 && "Can widen only data operand of mscatter"); + MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N); + SDValue DataOp = MSC->getValue(); + SDValue Mask = MSC->getMask(); + + // Widen the value + SDValue WideVal = GetWidenedVector(DataOp); + EVT WideVT = WideVal.getValueType(); + unsigned NumElts = WideVal.getValueType().getVectorNumElements(); + SDLoc dl(N); + + // The mask should be widened as well + Mask = WidenTargetBoolean(Mask, WideVT, true); + + // Widen index + SDValue Index = MSC->getIndex(); + EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(), + Index.getValueType().getScalarType(), + NumElts); + Index = ModifyToType(Index, WideIndexVT); + + SDValue Ops[] = {MSC->getChain(), WideVal, Mask, MSC->getBasePtr(), Index}; + return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), + MSC->getMemoryVT(), dl, Ops, + MSC->getMemOperand()); +} + SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { SDValue InOp0 = GetWidenedVector(N->getOperand(0)); SDValue InOp1 = GetWidenedVector(N->getOperand(1)); @@ -3533,7 +3704,9 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, /// Modifies a vector input (widen or narrows) to a vector of NVT. The /// input vector must have the same element type as NVT. -SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) { +/// FillWithZeroes specifies that the vector should be widened with zeroes. +SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT, + bool FillWithZeroes) { // Note that InOp might have been widened so it might already have // the right width or it might need be narrowed. EVT InVT = InOp.getValueType(); @@ -3550,10 +3723,11 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) { if (WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0) { unsigned NumConcat = WidenNumElts / InNumElts; SmallVector<SDValue, 16> Ops(NumConcat); - SDValue UndefVal = DAG.getUNDEF(InVT); + SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, InVT) : + DAG.getUNDEF(InVT); Ops[0] = InOp; for (unsigned i = 1; i != NumConcat; ++i) - Ops[i] = UndefVal; + Ops[i] = FillVal; return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, Ops); } @@ -3573,8 +3747,9 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) { ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - SDValue UndefVal = DAG.getUNDEF(EltVT); + SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) : + DAG.getUNDEF(EltVT); for ( ; Idx < WidenNumElts; ++Idx) - Ops[Idx] = UndefVal; + Ops[Idx] = FillVal; return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Ops); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index 6303422b9ae9..622e06f0da2a 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -49,7 +49,7 @@ ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) TII = STI.getInstrInfo(); ResourcesModel.reset(TII->CreateTargetScheduleState(STI)); // This hard requirement could be relaxed, but for now - // do not let it procede. + // do not let it proceed. assert(ResourcesModel && "Unimplemented CreateTargetScheduleState."); unsigned NumRC = TRI->getNumRegClasses(); @@ -269,12 +269,12 @@ bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) { } // Now see if there are no other dependencies - // to instructions alredy in the packet. + // to instructions already in the packet. for (unsigned i = 0, e = Packet.size(); i != e; ++i) for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(), E = Packet[i]->Succs.end(); I != E; ++I) { // Since we do not add pseudos to packets, might as well - // ignor order deps. + // ignore order deps. if (I->isCtrl()) continue; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 34e1a7001082..62e7733ecd2b 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -440,7 +440,7 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); NumRes = MCID.getNumDefs(); - for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { + for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { if (Reg == *ImpDef) break; ++NumRes; @@ -519,7 +519,7 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); if (!MCID.ImplicitDefs) continue; - for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) { + for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) { CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); } } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index e9bd52034ffd..91024e672f9c 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -141,8 +141,8 @@ private: /// that are "live". These nodes must be scheduled before any other nodes that /// modifies the registers can be scheduled. unsigned NumLiveRegs; - std::vector<SUnit*> LiveRegDefs; - std::vector<SUnit*> LiveRegGens; + std::unique_ptr<SUnit*[]> LiveRegDefs; + std::unique_ptr<SUnit*[]> LiveRegGens; // Collect interferences between physical register use/defs. // Each interference is an SUnit and set of physical registers. @@ -328,8 +328,8 @@ void ScheduleDAGRRList::Schedule() { NumLiveRegs = 0; // Allocate slots for each physical register, plus one for a special register // to track the virtual resource of a calling sequence. - LiveRegDefs.resize(TRI->getNumRegs() + 1, nullptr); - LiveRegGens.resize(TRI->getNumRegs() + 1, nullptr); + LiveRegDefs.reset(new SUnit*[TRI->getNumRegs() + 1]()); + LiveRegGens.reset(new SUnit*[TRI->getNumRegs() + 1]()); CallSeqEndForStart.clear(); assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences"); @@ -1206,7 +1206,7 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); NumRes = MCID.getNumDefs(); - for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { + for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { if (Reg == *ImpDef) break; ++NumRes; @@ -1218,7 +1218,7 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, /// CheckForLiveRegDef - Return true and update live register vector if the /// specified register def of the specified SUnit clobbers any "live" registers. static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, - std::vector<SUnit*> &LiveRegDefs, + SUnit **LiveRegDefs, SmallSet<unsigned, 4> &RegAdded, SmallVectorImpl<unsigned> &LRegs, const TargetRegisterInfo *TRI) { @@ -1240,7 +1240,7 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, /// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered /// by RegMask, and add them to LRegs. static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, - std::vector<SUnit*> &LiveRegDefs, + ArrayRef<SUnit*> LiveRegDefs, SmallSet<unsigned, 4> &RegAdded, SmallVectorImpl<unsigned> &LRegs) { // Look at all live registers. Skip Reg0 and the special CallResource. @@ -1278,7 +1278,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) { for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU) - CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, + CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs.get(), RegAdded, LRegs, TRI); } @@ -1302,7 +1302,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) { for (; NumVals; --NumVals, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) - CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); + CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); } } else i += NumVals; @@ -1328,13 +1328,15 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) { } } if (const uint32_t *RegMask = getNodeRegMask(Node)) - CheckForLiveRegDefMasked(SU, RegMask, LiveRegDefs, RegAdded, LRegs); + CheckForLiveRegDefMasked(SU, RegMask, + makeArrayRef(LiveRegDefs.get(), TRI->getNumRegs()), + RegAdded, LRegs); const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); if (!MCID.ImplicitDefs) continue; - for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) - CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); + for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) + CheckForLiveRegDef(SU, *Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); } return !LRegs.empty(); @@ -2718,7 +2720,7 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, ScheduleDAGRRList *scheduleDAG, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) { - const uint16_t *ImpDefs + const MCPhysReg *ImpDefs = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs(); const uint32_t *RegMask = getNodeRegMask(SU->getNode()); if(!ImpDefs && !RegMask) @@ -2737,7 +2739,7 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, return true; if (ImpDefs) - for (const uint16_t *ImpDef = ImpDefs; *ImpDef; ++ImpDef) + for (const MCPhysReg *ImpDef = ImpDefs; *ImpDef; ++ImpDef) // Return true if SU clobbers this physical register use and the // definition of the register reaches from DepSU. IsReachable queries // a topological forward sort of the DAG (following the successors). @@ -2756,13 +2758,13 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, const TargetRegisterInfo *TRI) { SDNode *N = SuccSU->getNode(); unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); - const uint16_t *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); + const MCPhysReg *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); assert(ImpDefs && "Caller should check hasPhysRegDefs"); for (const SDNode *SUNode = SU->getNode(); SUNode; SUNode = SUNode->getGluedNode()) { if (!SUNode->isMachineOpcode()) continue; - const uint16_t *SUImpDefs = + const MCPhysReg *SUImpDefs = TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); const uint32_t *SURegMask = getNodeRegMask(SUNode); if (!SUImpDefs && !SURegMask) diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 159c28cd2a61..5cc806668b12 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -86,12 +86,6 @@ namespace llvm { /// flagged together nodes with a single SUnit. void BuildSchedGraph(AliasAnalysis *AA); - /// InitVRegCycleFlag - Set isVRegCycle if this node's single use is - /// CopyToReg and its only active data operands are CopyFromReg within a - /// single block loop. - /// - void InitVRegCycleFlag(SUnit *SU); - /// InitNumRegDefsLeft - Determine the # of regs defined by this node. /// void InitNumRegDefsLeft(SUnit *SU); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 14f44ccc60ce..abbc48e10e46 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -13,6 +13,7 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "SDNodeDbgValue.h" +#include "llvm/ADT/APSInt.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" @@ -210,28 +211,6 @@ bool ISD::isBuildVectorOfConstantFPSDNodes(const SDNode *N) { return true; } -/// isScalarToVector - Return true if the specified node is a -/// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low -/// element is not an undef. -bool ISD::isScalarToVector(const SDNode *N) { - if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) - return true; - - if (N->getOpcode() != ISD::BUILD_VECTOR) - return false; - if (N->getOperand(0).getOpcode() == ISD::UNDEF) - return false; - unsigned NumElems = N->getNumOperands(); - if (NumElems == 1) - return false; - for (unsigned i = 1; i < NumElems; ++i) { - SDValue V = N->getOperand(i); - if (V.getOpcode() != ISD::UNDEF) - return false; - } - return true; -} - /// allOperandsUndef - Return true if the node has at least one operand /// and all operands of the specified node are ISD::UNDEF. bool ISD::allOperandsUndef(const SDNode *N) { @@ -397,24 +376,21 @@ static void AddNodeIDOperands(FoldingSetNodeID &ID, ID.AddInteger(Op.getResNo()); } } + /// Add logical or fast math flag values to FoldingSetNodeID value. static void AddNodeIDFlags(FoldingSetNodeID &ID, unsigned Opcode, const SDNodeFlags *Flags) { - if (!Flags || !isBinOpWithFlags(Opcode)) + if (!isBinOpWithFlags(Opcode)) return; - unsigned RawFlags = Flags->getRawFlags(); - // If no flags are set, do not alter the ID. We must match the ID of nodes - // that were created without explicitly specifying flags. This also saves time - // and allows a gradual increase in API usage of the optional optimization - // flags. - if (RawFlags != 0) - ID.AddInteger(RawFlags); + unsigned RawFlags = 0; + if (Flags) + RawFlags = Flags->getRawFlags(); + ID.AddInteger(RawFlags); } static void AddNodeIDFlags(FoldingSetNodeID &ID, const SDNode *N) { - if (auto *Node = dyn_cast<BinaryWithFlagsSDNode>(N)) - AddNodeIDFlags(ID, Node->getOpcode(), &Node->Flags); + AddNodeIDFlags(ID, N->getOpcode(), N->getFlags()); } static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned short OpC, @@ -624,9 +600,9 @@ void SelectionDAG::RemoveDeadNodes() { SmallVector<SDNode*, 128> DeadNodes; // Add all obviously-dead nodes to the DeadNodes worklist. - for (allnodes_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ++I) - if (I->use_empty()) - DeadNodes.push_back(I); + for (SDNode &Node : allnodes()) + if (Node.use_empty()) + DeadNodes.push_back(&Node); RemoveDeadNodes(DeadNodes); @@ -766,6 +742,7 @@ static void VerifySDNode(SDNode *N) { void SelectionDAG::InsertNode(SDNode *N) { AllNodes.push_back(N); #ifndef NDEBUG + N->PersistentId = NextPersistentId++; VerifySDNode(N); #endif } @@ -929,7 +906,7 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)), Root(getEntryNode()), NewNodesMustHaveLegalTypes(false), UpdateListeners(nullptr) { - AllNodes.push_back(&EntryNode); + InsertNode(&EntryNode); DbgInfo = new SDDbgInfo(); } @@ -950,7 +927,10 @@ void SelectionDAG::allnodes_clear() { assert(&*AllNodes.begin() == &EntryNode); AllNodes.remove(AllNodes.begin()); while (!AllNodes.empty()) - DeallocateNode(AllNodes.begin()); + DeallocateNode(&AllNodes.front()); +#ifndef NDEBUG + NextPersistentId = 0; +#endif } BinarySDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, SDLoc DL, @@ -1023,7 +1003,7 @@ void SelectionDAG::clear() { static_cast<SDNode*>(nullptr)); EntryNode.UseList = nullptr; - AllNodes.push_back(&EntryNode); + InsertNode(&EntryNode); Root = getEntryNode(); DbgInfo->clear(); } @@ -1429,8 +1409,8 @@ SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset, if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset, - TargetFlags); + SDNode *N = + new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset, TargetFlags); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); @@ -1852,8 +1832,58 @@ SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) { EVT ShTy = TLI->getShiftAmountTy(LHSTy, getDataLayout()); if (OpTy == ShTy || OpTy.isVector()) return Op; - ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND; - return getNode(Opcode, SDLoc(Op), ShTy, Op); + return getZExtOrTrunc(Op, SDLoc(Op), ShTy); +} + +SDValue SelectionDAG::expandVAArg(SDNode *Node) { + SDLoc dl(Node); + const TargetLowering &TLI = getTargetLoweringInfo(); + const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); + EVT VT = Node->getValueType(0); + SDValue Tmp1 = Node->getOperand(0); + SDValue Tmp2 = Node->getOperand(1); + unsigned Align = Node->getConstantOperandVal(3); + + SDValue VAListLoad = + getLoad(TLI.getPointerTy(getDataLayout()), dl, Tmp1, Tmp2, + MachinePointerInfo(V), false, false, false, 0); + SDValue VAList = VAListLoad; + + if (Align > TLI.getMinStackArgumentAlignment()) { + assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2"); + + VAList = getNode(ISD::ADD, dl, VAList.getValueType(), VAList, + getConstant(Align - 1, dl, VAList.getValueType())); + + VAList = getNode(ISD::AND, dl, VAList.getValueType(), VAList, + getConstant(-(int64_t)Align, dl, VAList.getValueType())); + } + + // Increment the pointer, VAList, to the next vaarg + Tmp1 = getNode(ISD::ADD, dl, VAList.getValueType(), VAList, + getConstant(getDataLayout().getTypeAllocSize( + VT.getTypeForEVT(*getContext())), + dl, VAList.getValueType())); + // Store the incremented VAList to the legalized pointer + Tmp1 = getStore(VAListLoad.getValue(1), dl, Tmp1, Tmp2, + MachinePointerInfo(V), false, false, 0); + // Load the actual argument out of the pointer VAList + return getLoad(VT, dl, Tmp1, VAList, MachinePointerInfo(), + false, false, false, 0); +} + +SDValue SelectionDAG::expandVACopy(SDNode *Node) { + SDLoc dl(Node); + const TargetLowering &TLI = getTargetLoweringInfo(); + // This defaults to loading a pointer from the input and storing it to the + // output, returning the chain. + const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue(); + const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue(); + SDValue Tmp1 = getLoad(TLI.getPointerTy(getDataLayout()), dl, + Node->getOperand(0), Node->getOperand(2), + MachinePointerInfo(VS), false, false, false, 0); + return getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), + MachinePointerInfo(VD), false, false, 0); } /// CreateStackTemporary - Create a stack temporary, suitable for holding the @@ -1872,8 +1902,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { /// CreateStackTemporary - Create a stack temporary suitable for holding /// either of the specified value types. SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { - unsigned Bytes = std::max(VT1.getStoreSizeInBits(), - VT2.getStoreSizeInBits())/8; + unsigned Bytes = std::max(VT1.getStoreSize(), VT2.getStoreSize()); Type *Ty1 = VT1.getTypeForEVT(*getContext()); Type *Ty2 = VT2.getTypeForEVT(*getContext()); const DataLayout &DL = getDataLayout(); @@ -2255,7 +2284,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, unsigned MemBits = VT.getScalarType().getSizeInBits(); KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); } else if (const MDNode *Ranges = LD->getRanges()) { - computeKnownBitsFromRangeMetadata(*Ranges, KnownZero); + if (LD->getExtensionType() == ISD::NON_EXTLOAD) + computeKnownBitsFromRangeMetadata(*Ranges, KnownZero, KnownOne); } break; } @@ -2564,6 +2594,11 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ if (Tmp == 1) return 1; // Early out. Tmp2 = ComputeNumSignBits(Op.getOperand(2), Depth+1); return std::min(Tmp, Tmp2); + case ISD::SELECT_CC: + Tmp = ComputeNumSignBits(Op.getOperand(2), Depth+1); + if (Tmp == 1) return 1; // Early out. + Tmp2 = ComputeNumSignBits(Op.getOperand(3), Depth+1); + return std::min(Tmp, Tmp2); case ISD::SMIN: case ISD::SMAX: case ISD::UMIN: @@ -2679,7 +2714,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ const int rIndex = Items - 1 - cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); - // If the sign portion ends in our element the substraction gives correct + // If the sign portion ends in our element the subtraction gives correct // result. Otherwise it gives either negative or > bitwidth result return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0); } @@ -2798,6 +2833,16 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const { return false; } +bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const { + assert(A.getValueType() == B.getValueType() && + "Values must have the same type"); + APInt AZero, AOne; + APInt BZero, BOne; + computeKnownBits(A, AZero, AOne); + computeKnownBits(B, BZero, BOne); + return (AZero | BZero).isAllOnesValue(); +} + /// getNode - Gets or creates the specified node. /// SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) { @@ -2848,8 +2893,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, return getConstantFP(APFloat(APFloat::IEEEhalf, Val), DL, VT); if (VT == MVT::f32 && C->getValueType(0) == MVT::i32) return getConstantFP(APFloat(APFloat::IEEEsingle, Val), DL, VT); - else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) + if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) return getConstantFP(APFloat(APFloat::IEEEdouble, Val), DL, VT); + if (VT == MVT::f128 && C->getValueType(0) == MVT::i128) + return getConstantFP(APFloat(APFloat::IEEEquad, Val), DL, VT); break; case ISD::BSWAP: return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(), @@ -2954,44 +3001,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: case ISD::CTPOP: { - EVT SVT = VT.getScalarType(); - EVT InVT = BV->getValueType(0); - EVT InSVT = InVT.getScalarType(); - - // Find legal integer scalar type for constant promotion and - // ensure that its scalar size is at least as large as source. - EVT LegalSVT = SVT; - if (SVT.isInteger()) { - LegalSVT = TLI->getTypeToTransformTo(*getContext(), SVT); - if (LegalSVT.bitsLT(SVT)) break; - } - - // Let the above scalar folding handle the folding of each element. - SmallVector<SDValue, 8> Ops; - for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) { - SDValue OpN = BV->getOperand(i); - EVT OpVT = OpN.getValueType(); - - // Build vector (integer) scalar operands may need implicit - // truncation - do this before constant folding. - if (OpVT.isInteger() && OpVT.bitsGT(InSVT)) - OpN = getNode(ISD::TRUNCATE, DL, InSVT, OpN); - - OpN = getNode(Opcode, DL, SVT, OpN); - - // Legalize the (integer) scalar constant if necessary. - if (LegalSVT != SVT) - OpN = getNode(ISD::ANY_EXTEND, DL, LegalSVT, OpN); - - if (OpN.getOpcode() != ISD::UNDEF && - OpN.getOpcode() != ISD::Constant && - OpN.getOpcode() != ISD::ConstantFP) - break; - Ops.push_back(OpN); - } - if (Ops.size() == VT.getVectorNumElements()) - return getNode(ISD::BUILD_VECTOR, DL, VT, Ops); - break; + SDValue Ops = { Operand }; + if (SDValue Fold = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops)) + return Fold; } } } @@ -3012,6 +3024,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, VT.getVectorNumElements() == Operand.getValueType().getVectorNumElements()) && "Vector element count mismatch!"); + assert(Operand.getValueType().bitsLT(VT) && + "Invalid fpext node, dst < src!"); if (Operand.getOpcode() == ISD::UNDEF) return getUNDEF(VT); break; @@ -3019,12 +3033,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, assert(VT.isInteger() && Operand.getValueType().isInteger() && "Invalid SIGN_EXTEND!"); if (Operand.getValueType() == VT) return Operand; // noop extension - assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) && - "Invalid sext node, dst < src!"); assert((!VT.isVector() || VT.getVectorNumElements() == Operand.getValueType().getVectorNumElements()) && "Vector element count mismatch!"); + assert(Operand.getValueType().bitsLT(VT) && + "Invalid sext node, dst < src!"); if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND) return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); else if (OpOpcode == ISD::UNDEF) @@ -3035,12 +3049,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, assert(VT.isInteger() && Operand.getValueType().isInteger() && "Invalid ZERO_EXTEND!"); if (Operand.getValueType() == VT) return Operand; // noop extension - assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) && - "Invalid zext node, dst < src!"); assert((!VT.isVector() || VT.getVectorNumElements() == Operand.getValueType().getVectorNumElements()) && "Vector element count mismatch!"); + assert(Operand.getValueType().bitsLT(VT) && + "Invalid zext node, dst < src!"); if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x) return getNode(ISD::ZERO_EXTEND, DL, VT, Operand.getNode()->getOperand(0)); @@ -3052,12 +3066,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, assert(VT.isInteger() && Operand.getValueType().isInteger() && "Invalid ANY_EXTEND!"); if (Operand.getValueType() == VT) return Operand; // noop extension - assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) && - "Invalid anyext node, dst < src!"); assert((!VT.isVector() || VT.getVectorNumElements() == Operand.getValueType().getVectorNumElements()) && "Vector element count mismatch!"); + assert(Operand.getValueType().bitsLT(VT) && + "Invalid anyext node, dst < src!"); if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ANY_EXTEND) @@ -3077,12 +3091,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, assert(VT.isInteger() && Operand.getValueType().isInteger() && "Invalid TRUNCATE!"); if (Operand.getValueType() == VT) return Operand; // noop truncate - assert(Operand.getValueType().getScalarType().bitsGT(VT.getScalarType()) && - "Invalid truncate node, src < dst!"); assert((!VT.isVector() || VT.getVectorNumElements() == Operand.getValueType().getVectorNumElements()) && "Vector element count mismatch!"); + assert(Operand.getValueType().bitsGT(VT) && + "Invalid truncate node, src < dst!"); if (OpOpcode == ISD::TRUNCATE) return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0)); if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || @@ -3135,8 +3149,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, case ISD::FNEG: // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB) + // FIXME: FNEG has no fast-math-flags to propagate; use the FSUB's flags? return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1), - Operand.getNode()->getOperand(0)); + Operand.getNode()->getOperand(0), + &cast<BinaryWithFlagsSDNode>(Operand.getNode())->Flags); if (OpOpcode == ISD::FNEG) // --X -> X return Operand.getNode()->getOperand(0); break; @@ -3182,6 +3198,10 @@ static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1, case ISD::SRA: return std::make_pair(C1.ashr(C2), true); case ISD::ROTL: return std::make_pair(C1.rotl(C2), true); case ISD::ROTR: return std::make_pair(C1.rotr(C2), true); + case ISD::SMIN: return std::make_pair(C1.sle(C2) ? C1 : C2, true); + case ISD::SMAX: return std::make_pair(C1.sge(C2) ? C1 : C2, true); + case ISD::UMIN: return std::make_pair(C1.ule(C2) ? C1 : C2, true); + case ISD::UMAX: return std::make_pair(C1.uge(C2) ? C1 : C2, true); case ISD::UDIV: if (!C2.getBoolValue()) break; @@ -3284,10 +3304,118 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT, return getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Outputs); } +SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, SDLoc DL, + EVT VT, + ArrayRef<SDValue> Ops, + const SDNodeFlags *Flags) { + // If the opcode is a target-specific ISD node, there's nothing we can + // do here and the operand rules may not line up with the below, so + // bail early. + if (Opcode >= ISD::BUILTIN_OP_END) + return SDValue(); + + // We can only fold vectors - maybe merge with FoldConstantArithmetic someday? + if (!VT.isVector()) + return SDValue(); + + unsigned NumElts = VT.getVectorNumElements(); + + auto IsScalarOrSameVectorSize = [&](const SDValue &Op) { + return !Op.getValueType().isVector() || + Op.getValueType().getVectorNumElements() == NumElts; + }; + + auto IsConstantBuildVectorOrUndef = [&](const SDValue &Op) { + BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op); + return (Op.getOpcode() == ISD::UNDEF) || + (Op.getOpcode() == ISD::CONDCODE) || (BV && BV->isConstant()); + }; + + // All operands must be vector types with the same number of elements as + // the result type and must be either UNDEF or a build vector of constant + // or UNDEF scalars. + if (!std::all_of(Ops.begin(), Ops.end(), IsConstantBuildVectorOrUndef) || + !std::all_of(Ops.begin(), Ops.end(), IsScalarOrSameVectorSize)) + return SDValue(); + + // If we are comparing vectors, then the result needs to be a i1 boolean + // that is then sign-extended back to the legal result type. + EVT SVT = (Opcode == ISD::SETCC ? MVT::i1 : VT.getScalarType()); + + // Find legal integer scalar type for constant promotion and + // ensure that its scalar size is at least as large as source. + EVT LegalSVT = VT.getScalarType(); + if (LegalSVT.isInteger()) { + LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT); + if (LegalSVT.bitsLT(SVT)) + return SDValue(); + } + + // Constant fold each scalar lane separately. + SmallVector<SDValue, 4> ScalarResults; + for (unsigned i = 0; i != NumElts; i++) { + SmallVector<SDValue, 4> ScalarOps; + for (SDValue Op : Ops) { + EVT InSVT = Op.getValueType().getScalarType(); + BuildVectorSDNode *InBV = dyn_cast<BuildVectorSDNode>(Op); + if (!InBV) { + // We've checked that this is UNDEF or a constant of some kind. + if (Op.isUndef()) + ScalarOps.push_back(getUNDEF(InSVT)); + else + ScalarOps.push_back(Op); + continue; + } + + SDValue ScalarOp = InBV->getOperand(i); + EVT ScalarVT = ScalarOp.getValueType(); + + // Build vector (integer) scalar operands may need implicit + // truncation - do this before constant folding. + if (ScalarVT.isInteger() && ScalarVT.bitsGT(InSVT)) + ScalarOp = getNode(ISD::TRUNCATE, DL, InSVT, ScalarOp); + + ScalarOps.push_back(ScalarOp); + } + + // Constant fold the scalar operands. + SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps, Flags); + + // Legalize the (integer) scalar constant if necessary. + if (LegalSVT != SVT) + ScalarResult = getNode(ISD::SIGN_EXTEND, DL, LegalSVT, ScalarResult); + + // Scalar folding only succeeded if the result is a constant or UNDEF. + if (ScalarResult.getOpcode() != ISD::UNDEF && + ScalarResult.getOpcode() != ISD::Constant && + ScalarResult.getOpcode() != ISD::ConstantFP) + return SDValue(); + ScalarResults.push_back(ScalarResult); + } + + assert(ScalarResults.size() == NumElts && + "Unexpected number of scalar results for BUILD_VECTOR"); + return getNode(ISD::BUILD_VECTOR, DL, VT, ScalarResults); +} + SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2, const SDNodeFlags *Flags) { ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2); + + // Canonicalize constant to RHS if commutative. + if (isCommutativeBinOp(Opcode)) { + if (N1C && !N2C) { + std::swap(N1C, N2C); + std::swap(N1, N2); + } else if (N1CFP && !N2CFP) { + std::swap(N1CFP, N2CFP); + std::swap(N1, N2); + } + } + switch (Opcode) { default: break; case ISD::TokenFactor: @@ -3356,6 +3484,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, case ISD::MUL: case ISD::SDIV: case ISD::SREM: + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: assert(VT.isInteger() && "This operator does not apply to FP types!"); assert(N1.getValueType() == N2.getValueType() && N1.getValueType() == VT && "Binary operator types must match!"); @@ -3367,37 +3499,20 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, case ISD::FREM: if (getTarget().Options.UnsafeFPMath) { if (Opcode == ISD::FADD) { - // 0+x --> x - if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) - if (CFP->getValueAPF().isZero()) - return N2; // x+0 --> x - if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2)) - if (CFP->getValueAPF().isZero()) - return N1; + if (N2CFP && N2CFP->getValueAPF().isZero()) + return N1; } else if (Opcode == ISD::FSUB) { // x-0 --> x - if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2)) - if (CFP->getValueAPF().isZero()) - return N1; + if (N2CFP && N2CFP->getValueAPF().isZero()) + return N1; } else if (Opcode == ISD::FMUL) { - ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1); - SDValue V = N2; - - // If the first operand isn't the constant, try the second - if (!CFP) { - CFP = dyn_cast<ConstantFPSDNode>(N2); - V = N1; - } - - if (CFP) { - // 0*x --> 0 - if (CFP->isZero()) - return SDValue(CFP,0); - // 1*x --> x - if (CFP->isExactlyValue(1.0)) - return V; - } + // x*0 --> 0 + if (N2CFP && N2CFP->isZero()) + return N2; + // x*1 --> x + if (N2CFP && N2CFP->isExactlyValue(1.0)) + return N1; } } assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); @@ -3457,7 +3572,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, assert(VT.isFloatingPoint() && N1.getValueType().isFloatingPoint() && VT.bitsLE(N1.getValueType()) && - isa<ConstantSDNode>(N2) && "Invalid FP_ROUND!"); + N2C && "Invalid FP_ROUND!"); if (N1.getValueType() == VT) return N1; // noop conversion. break; case ISD::AssertSext: @@ -3502,13 +3617,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SmallVector<SDValue, 8> Ops; for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) { SDValue Op = N1.getOperand(i); - if (Op.getValueType() != VT.getScalarType()) break; if (Op.getOpcode() == ISD::UNDEF) { - Ops.push_back(Op); + Ops.push_back(getUNDEF(VT.getScalarType())); continue; } if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { APInt Val = C->getAPIntValue(); + Val = Val.zextOrTrunc(VT.getScalarSizeInBits()); Ops.push_back(SignExtendInReg(Val)); continue; } @@ -3590,15 +3705,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, return N1.getOperand(N2C->getZExtValue()); // EXTRACT_ELEMENT of a constant int is also very common. - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { + if (N1C) { unsigned ElementSize = VT.getSizeInBits(); unsigned Shift = ElementSize * N2C->getZExtValue(); - APInt ShiftedVal = C->getAPIntValue().lshr(Shift); + APInt ShiftedVal = N1C->getAPIntValue().lshr(Shift); return getConstant(ShiftedVal.trunc(ElementSize), DL, VT); } break; - case ISD::EXTRACT_SUBVECTOR: { - SDValue Index = N2; + case ISD::EXTRACT_SUBVECTOR: if (VT.isSimple() && N1.getValueType().isSimple()) { assert(VT.isVector() && N1.getValueType().isVector() && "Extract subvector VTs must be a vectors!"); @@ -3608,9 +3722,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, assert(VT.getSimpleVT() <= N1.getSimpleValueType() && "Extract subvector must be from larger vector to smaller vector!"); - if (isa<ConstantSDNode>(Index)) { - assert((VT.getVectorNumElements() + - cast<ConstantSDNode>(Index)->getZExtValue() + if (N2C) { + assert((VT.getVectorNumElements() + N2C->getZExtValue() <= N1.getValueType().getVectorNumElements()) && "Extract subvector overflow!"); } @@ -3621,29 +3734,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, } break; } - } // Perform trivial constant folding. if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, N1.getNode(), N2.getNode())) return SV; - // Canonicalize constant to RHS if commutative. - if (N1C && !N2C && isCommutativeBinOp(Opcode)) { - std::swap(N1C, N2C); - std::swap(N1, N2); - } - // Constant fold FP operations. bool HasFPExceptions = TLI->hasFloatingPointExceptions(); - ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); - ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2); if (N1CFP) { - if (!N2CFP && isCommutativeBinOp(Opcode)) { - // Canonicalize constant to RHS if commutative. - std::swap(N1CFP, N2CFP); - std::swap(N1, N2); - } else if (N2CFP) { + if (N2CFP) { APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF(); APFloat::opStatus s; switch (Opcode) { @@ -3670,7 +3770,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, } break; case ISD::FREM : - s = V1.mod(V2, APFloat::rmNearestTiesToEven); + s = V1.mod(V2); if (!HasFPExceptions || (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero)) { return getConstantFP(V1, DL, VT); @@ -3795,7 +3895,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2, SDValue N3) { // Perform various simplifications. - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); switch (Opcode) { case ISD::FMA: { ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); @@ -3827,12 +3926,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, break; case ISD::SETCC: { // Use FoldSetCC to simplify SETCC's. - SDValue Simp = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL); - if (Simp.getNode()) return Simp; + if (SDValue V = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL)) + return V; + // Vector constant folding. + SDValue Ops[] = {N1, N2, N3}; + if (SDValue V = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops)) + return V; break; } case ISD::SELECT: - if (N1C) { + if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) { if (N1C->getZExtValue()) return N2; // select true, X, Y -> X return N3; // select false, X, Y -> Y @@ -4153,6 +4256,14 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, return true; } +static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { + // On Darwin, -Os means optimize for size without hurting performance, so + // only really optimize for size when -Oz (MinSize) is used. + if (MF.getTarget().getTargetTriple().isOSDarwin()) + return MF.getFunction()->optForMinSize(); + return MF.getFunction()->optForSize(); +} + static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, @@ -4173,7 +4284,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize); + bool OptSize = shouldLowerMemFuncForSize(MF); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -4286,7 +4397,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize); + bool OptSize = shouldLowerMemFuncForSize(MF); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -4380,7 +4491,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize); + bool OptSize = shouldLowerMemFuncForSize(MF); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -4446,6 +4557,16 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl, return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } +static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI, + unsigned AS) { + // Lowering memcpy / memset / memmove intrinsics to calls is only valid if all + // pointer operands can be losslessly bitcasted to pointers of address space 0 + if (AS != 0 && !TLI->isNoopAddrSpaceCast(AS, 0)) { + report_fatal_error("cannot lower memory intrinsic in address space " + + Twine(AS)); + } +} + SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, bool AlwaysInline, @@ -4487,6 +4608,9 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, true, DstPtrInfo, SrcPtrInfo); } + checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); + checkAddrSpaceIsValidForLibcall(TLI, SrcPtrInfo.getAddrSpace()); + // FIXME: If the memcpy is volatile (isVol), lowering it to a plain libc // memcpy is not guaranteed to be safe. libc memcpys aren't required to // respect volatile, so they may do things like read or write memory @@ -4548,6 +4672,9 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst, return Result; } + checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); + checkAddrSpaceIsValidForLibcall(TLI, SrcPtrInfo.getAddrSpace()); + // FIXME: If the memmove is volatile, lowering it to plain libc memmove may // not be safe. See memcpy above for more details. @@ -4605,6 +4732,8 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, return Result; } + checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); + // Emit a library call. Type *IntPtrTy = getDataLayout().getIntPtrType(*getContext()); TargetLowering::ArgListTy Args; @@ -4872,10 +5001,12 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, /// MachinePointerInfo record from it. This is particularly useful because the /// code generator has many cases where it doesn't bother passing in a /// MachinePointerInfo to getLoad or getStore when it has "FI+Cst". -static MachinePointerInfo InferPointerInfo(SDValue Ptr, int64_t Offset = 0) { +static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr, + int64_t Offset = 0) { // If this is FI+Offset, we can model it. if (const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) - return MachinePointerInfo::getFixedStack(FI->getIndex(), Offset); + return MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), + FI->getIndex(), Offset); // If this is (FI+Offset1)+Offset2, we can model it. if (Ptr.getOpcode() != ISD::ADD || @@ -4884,20 +5015,22 @@ static MachinePointerInfo InferPointerInfo(SDValue Ptr, int64_t Offset = 0) { return MachinePointerInfo(); int FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex(); - return MachinePointerInfo::getFixedStack(FI, Offset+ - cast<ConstantSDNode>(Ptr.getOperand(1))->getSExtValue()); + return MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), FI, + Offset + cast<ConstantSDNode>(Ptr.getOperand(1))->getSExtValue()); } /// InferPointerInfo - If the specified ptr/offset is a frame index, infer a /// MachinePointerInfo record from it. This is particularly useful because the /// code generator has many cases where it doesn't bother passing in a /// MachinePointerInfo to getLoad or getStore when it has "FI+Cst". -static MachinePointerInfo InferPointerInfo(SDValue Ptr, SDValue OffsetOp) { +static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr, + SDValue OffsetOp) { // If the 'Offset' value isn't a constant, we can't handle this. if (ConstantSDNode *OffsetNode = dyn_cast<ConstantSDNode>(OffsetOp)) - return InferPointerInfo(Ptr, OffsetNode->getSExtValue()); + return InferPointerInfo(DAG, Ptr, OffsetNode->getSExtValue()); if (OffsetOp.getOpcode() == ISD::UNDEF) - return InferPointerInfo(Ptr); + return InferPointerInfo(DAG, Ptr); return MachinePointerInfo(); } @@ -4926,7 +5059,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, // If we don't have a PtrInfo, infer the trivial frame index case to simplify // clients. if (PtrInfo.V.isNull()) - PtrInfo = InferPointerInfo(Ptr, Offset); + PtrInfo = InferPointerInfo(*this, Ptr, Offset); MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = @@ -5054,7 +5187,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val, Flags |= MachineMemOperand::MONonTemporal; if (PtrInfo.V.isNull()) - PtrInfo = InferPointerInfo(Ptr); + PtrInfo = InferPointerInfo(*this, Ptr); MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = @@ -5109,7 +5242,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, Flags |= MachineMemOperand::MONonTemporal; if (PtrInfo.V.isNull()) - PtrInfo = InferPointerInfo(Ptr); + PtrInfo = InferPointerInfo(*this, Ptr); MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = @@ -5261,7 +5394,7 @@ SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, SDLoc dl, cast<MaskedGatherSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - MaskedGatherSDNode *N = + MaskedGatherSDNode *N = new (NodeAllocator) MaskedGatherSDNode(dl.getIROrder(), dl.getDebugLoc(), Ops, VTs, VT, MMO); CSEMap.InsertNode(N, IP); @@ -5317,12 +5450,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, - ArrayRef<SDValue> Ops) { + ArrayRef<SDValue> Ops, const SDNodeFlags *Flags) { unsigned NumOps = Ops.size(); switch (NumOps) { case 0: return getNode(Opcode, DL, VT); case 1: return getNode(Opcode, DL, VT, Ops[0]); - case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]); + case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Flags); case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]); default: break; } @@ -5656,7 +5789,7 @@ UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops) { "Update with wrong number of operands"); // If no operands changed just return the input node. - if (Ops.empty() || std::equal(Ops.begin(), Ops.end(), N->op_begin())) + if (std::equal(Ops.begin(), Ops.end(), N->op_begin())) return N; // See if the modified node already exists. @@ -6451,13 +6584,13 @@ unsigned SelectionDAG::AssignTopologicalOrder() { // Node Id fields for nodes At SortedPos and after will contain the // count of outstanding operands. for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) { - SDNode *N = I++; + SDNode *N = &*I++; checkForCycles(N, this); unsigned Degree = N->getNumOperands(); if (Degree == 0) { // A node with no uses, add it to the result array immediately. N->setNodeId(DAGSize++); - allnodes_iterator Q = N; + allnodes_iterator Q(N); if (Q != SortedPos) SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q)); assert(SortedPos != AllNodes.end() && "Overran node list"); @@ -6470,8 +6603,8 @@ unsigned SelectionDAG::AssignTopologicalOrder() { // Visit all the nodes. As we iterate, move nodes into sorted order, // such that by the time the end is reached all nodes will be sorted. - for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ++I) { - SDNode *N = I; + for (SDNode &Node : allnodes()) { + SDNode *N = &Node; checkForCycles(N, this); // N is in sorted position, so all its uses have one less operand // that needs to be sorted. @@ -6493,9 +6626,10 @@ unsigned SelectionDAG::AssignTopologicalOrder() { P->setNodeId(Degree); } } - if (I == SortedPos) { + if (&Node == SortedPos) { #ifndef NDEBUG - SDNode *S = ++I; + allnodes_iterator I(N); + SDNode *S = &*++I; dbgs() << "Overran sorted position:\n"; S->dumprFull(this); dbgs() << "\n"; dbgs() << "Checking if this is due to cycles\n"; @@ -6559,6 +6693,26 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) { // SDNode Class //===----------------------------------------------------------------------===// +bool llvm::isNullConstant(SDValue V) { + ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); + return Const != nullptr && Const->isNullValue(); +} + +bool llvm::isNullFPConstant(SDValue V) { + ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(V); + return Const != nullptr && Const->isZero() && !Const->isNegative(); +} + +bool llvm::isAllOnesConstant(SDValue V) { + ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); + return Const != nullptr && Const->isAllOnesValue(); +} + +bool llvm::isOneConstant(SDValue V) { + ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); + return Const != nullptr && Const->isOne(); +} + HandleSDNode::~HandleSDNode() { DropOperands(); } @@ -6772,6 +6926,12 @@ uint64_t SDNode::getConstantOperandVal(unsigned Num) const { return cast<ConstantSDNode>(OperandList[Num])->getZExtValue(); } +const SDNodeFlags *SDNode::getFlags() const { + if (auto *FlagsNode = dyn_cast<BinaryWithFlagsSDNode>(this)) + return &FlagsNode->Flags; + return nullptr; +} + SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { assert(N->getNumValues() == 1 && "Can't unroll a vector with multiple results!"); @@ -6808,9 +6968,11 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { } switch (N->getOpcode()) { - default: - Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands)); + default: { + Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands, + N->getFlags())); break; + } case ISD::VSELECT: Scalars.push_back(getNode(ISD::SELECT, dl, EltVT, Operands)); break; @@ -7101,6 +7263,24 @@ BuildVectorSDNode::getConstantFPSplatNode(BitVector *UndefElements) const { return dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements)); } +int32_t +BuildVectorSDNode::getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, + uint32_t BitWidth) const { + if (ConstantFPSDNode *CN = + dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements))) { + bool IsExact; + APSInt IntVal(BitWidth); + APFloat APF = CN->getValueAPF(); + if (APF.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact) != + APFloat::opOK || + !IsExact) + return -1; + + return IntVal.exactLogBase2(); + } + return -1; +} + bool BuildVectorSDNode::isConstant() const { for (const SDValue &Op : op_values()) { unsigned Opc = Op.getOpcode(); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 2c3c0eb101a0..d2ea85ab4d22 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -22,6 +22,7 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" @@ -63,6 +64,7 @@ #include "llvm/Target/TargetSelectionDAGInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> +#include <utility> using namespace llvm; #define DEBUG_TYPE "isel" @@ -79,7 +81,7 @@ LimitFPPrecision("limit-float-precision", cl::init(0)); static cl::opt<bool> -EnableFMFInDAG("enable-fmf-dag", cl::init(false), cl::Hidden, +EnableFMFInDAG("enable-fmf-dag", cl::init(true), cl::Hidden, cl::desc("Enable fast-math-flags for DAG nodes")); // Limit the width of DAG chains. This is important in general to prevent @@ -196,6 +198,14 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, if (PartEVT == ValueVT) return Val; + if (PartEVT.isInteger() && ValueVT.isFloatingPoint() && + ValueVT.bitsLT(PartEVT)) { + // For an FP value in an integer part, we need to truncate to the right + // width first. + PartEVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); + Val = DAG.getNode(ISD::TRUNCATE, DL, PartEVT, Val); + } + if (PartEVT.isInteger() && ValueVT.isInteger()) { if (ValueVT.bitsLT(PartEVT)) { // For a truncate, see if we have any information to @@ -319,9 +329,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() && "Cannot handle this kind of promotion"); // Promoted vector extract - bool Smaller = ValueVT.bitsLE(PartEVT); - return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), - DL, ValueVT, Val); + return DAG.getAnyExtOrTrunc(Val, DL, ValueVT); } @@ -339,11 +347,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, } if (ValueVT.getVectorNumElements() == 1 && - ValueVT.getVectorElementType() != PartEVT) { - bool Smaller = ValueVT.bitsLE(PartEVT); - Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), - DL, ValueVT.getScalarType(), Val); - } + ValueVT.getVectorElementType() != PartEVT) + Val = DAG.getAnyExtOrTrunc(Val, DL, ValueVT.getScalarType()); return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); } @@ -387,6 +392,12 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL, assert(NumParts == 1 && "Do not know what to promote to!"); Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val); } else { + if (ValueVT.isFloatingPoint()) { + // FP values need to be bitcast, then extended if they are being put + // into a larger container. + ValueVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); + Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + } assert((PartVT.isInteger() || PartVT == MVT::x86mmx) && ValueVT.isInteger() && "Unknown mismatch!"); @@ -520,9 +531,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) { // Promoted vector extract - bool Smaller = PartEVT.bitsLE(ValueVT); - Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), - DL, PartVT, Val); + Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); } else{ // Vector -> scalar conversion. assert(ValueVT.getVectorNumElements() == 1 && @@ -531,9 +540,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); - bool Smaller = ValueVT.bitsLE(PartVT); - Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), - DL, PartVT, Val); + Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); } Parts[0] = Val; @@ -595,8 +602,7 @@ RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI, const DataLayout &DL, unsigned Reg, Type *Ty) { ComputeValueVTs(TLI, DL, Ty, ValueVTs); - for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { - EVT ValueVT = ValueVTs[Value]; + for (EVT ValueVT : ValueVTs) { unsigned NumRegs = TLI.getNumRegisters(Context, ValueVT); MVT RegisterVT = TLI.getRegisterType(Context, ValueVT); for (unsigned i = 0; i != NumRegs; ++i) @@ -907,7 +913,8 @@ void SelectionDAGBuilder::visit(const Instruction &I) { visit(I.getOpcode(), I); - if (!isa<TerminatorInst>(&I) && !HasTailCall) + if (!isa<TerminatorInst>(&I) && !HasTailCall && + !isStatepoint(&I)) // statepoints handle their exports internally CopyToExportRegsIfNeeded(&I); CurInst = nullptr; @@ -943,14 +950,12 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, assert(Variable->isValidLocationForIntrinsic(dl) && "Expected inlined-at fields to agree"); uint64_t Offset = DI->getOffset(); - // A dbg.value for an alloca is always indirect. - bool IsIndirect = isa<AllocaInst>(V) || Offset != 0; SDDbgValue *SDV; if (Val.getNode()) { - if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, Offset, IsIndirect, + if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, Offset, false, Val)) { SDV = DAG.getDbgValue(Variable, Expr, Val.getNode(), Val.getResNo(), - IsIndirect, Offset, dl, DbgSDNodeOrder); + false, Offset, dl, DbgSDNodeOrder); DAG.AddDbgValue(SDV, Val.getNode(), false); } } else @@ -1168,6 +1173,135 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { llvm_unreachable("Can't get register for value!"); } +void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) { + auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); + bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX; + bool IsCoreCLR = Pers == EHPersonality::CoreCLR; + MachineBasicBlock *CatchPadMBB = FuncInfo.MBB; + // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues. + if (IsMSVCCXX || IsCoreCLR) + CatchPadMBB->setIsEHFuncletEntry(); + + DAG.setRoot(DAG.getNode(ISD::CATCHPAD, getCurSDLoc(), MVT::Other, getControlRoot())); +} + +void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) { + // Update machine-CFG edge. + MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()]; + FuncInfo.MBB->addSuccessor(TargetMBB); + + auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); + bool IsSEH = isAsynchronousEHPersonality(Pers); + if (IsSEH) { + // If this is not a fall-through branch or optimizations are switched off, + // emit the branch. + if (TargetMBB != NextBlock(FuncInfo.MBB) || + TM.getOptLevel() == CodeGenOpt::None) + DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, + getControlRoot(), DAG.getBasicBlock(TargetMBB))); + return; + } + + // Figure out the funclet membership for the catchret's successor. + // This will be used by the FuncletLayout pass to determine how to order the + // BB's. + WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo(); + const BasicBlock *SuccessorColor = EHInfo->CatchRetSuccessorColorMap[&I]; + assert(SuccessorColor && "No parent funclet for catchret!"); + MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor]; + assert(SuccessorColorMBB && "No MBB for SuccessorColor!"); + + // Create the terminator node. + SDValue Ret = DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other, + getControlRoot(), DAG.getBasicBlock(TargetMBB), + DAG.getBasicBlock(SuccessorColorMBB)); + DAG.setRoot(Ret); +} + +void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) { + // Don't emit any special code for the cleanuppad instruction. It just marks + // the start of a funclet. + FuncInfo.MBB->setIsEHFuncletEntry(); + FuncInfo.MBB->setIsCleanupFuncletEntry(); +} + +/// When an invoke or a cleanupret unwinds to the next EH pad, there are +/// many places it could ultimately go. In the IR, we have a single unwind +/// destination, but in the machine CFG, we enumerate all the possible blocks. +/// This function skips over imaginary basic blocks that hold catchswitch +/// instructions, and finds all the "real" machine +/// basic block destinations. As those destinations may not be successors of +/// EHPadBB, here we also calculate the edge probability to those destinations. +/// The passed-in Prob is the edge probability to EHPadBB. +static void findUnwindDestinations( + FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB, + BranchProbability Prob, + SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>> + &UnwindDests) { + EHPersonality Personality = + classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); + bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX; + bool IsCoreCLR = Personality == EHPersonality::CoreCLR; + + while (EHPadBB) { + const Instruction *Pad = EHPadBB->getFirstNonPHI(); + BasicBlock *NewEHPadBB = nullptr; + if (isa<LandingPadInst>(Pad)) { + // Stop on landingpads. They are not funclets. + UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob); + break; + } else if (isa<CleanupPadInst>(Pad)) { + // Stop on cleanup pads. Cleanups are always funclet entries for all known + // personalities. + UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob); + UnwindDests.back().first->setIsEHFuncletEntry(); + break; + } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) { + // Add the catchpad handlers to the possible destinations. + for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) { + UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob); + // For MSVC++ and the CLR, catchblocks are funclets and need prologues. + if (IsMSVCCXX || IsCoreCLR) + UnwindDests.back().first->setIsEHFuncletEntry(); + } + NewEHPadBB = CatchSwitch->getUnwindDest(); + } else { + continue; + } + + BranchProbabilityInfo *BPI = FuncInfo.BPI; + if (BPI && NewEHPadBB) + Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB); + EHPadBB = NewEHPadBB; + } +} + +void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) { + // Update successor info. + SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests; + auto UnwindDest = I.getUnwindDest(); + BranchProbabilityInfo *BPI = FuncInfo.BPI; + BranchProbability UnwindDestProb = + (BPI && UnwindDest) + ? BPI->getEdgeProbability(FuncInfo.MBB->getBasicBlock(), UnwindDest) + : BranchProbability::getZero(); + findUnwindDestinations(FuncInfo, UnwindDest, UnwindDestProb, UnwindDests); + for (auto &UnwindDest : UnwindDests) { + UnwindDest.first->setIsEHPad(); + addSuccessorWithProb(FuncInfo.MBB, UnwindDest.first, UnwindDest.second); + } + FuncInfo.MBB->normalizeSuccProbs(); + + // Create the terminator node. + SDValue Ret = + DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, getControlRoot()); + DAG.setRoot(Ret); +} + +void SelectionDAGBuilder::visitCatchSwitch(const CatchSwitchInst &CSI) { + report_fatal_error("visitCatchSwitch not yet implemented!"); +} + void SelectionDAGBuilder::visitRet(const ReturnInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); auto &DL = DAG.getDataLayout(); @@ -1186,7 +1320,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { ComputeValueVTs(TLI, DL, PointerType::getUnqual(F->getReturnType()), PtrValueVTs); - SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]); + SDValue RetPtr = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), + DemoteReg, PtrValueVTs[0]); SDValue RetOp = getValue(I.getOperand(0)); SmallVector<EVT, 4> ValueVTs; @@ -1334,25 +1469,34 @@ bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V, } /// Return branch probability calculated by BranchProbabilityInfo for IR blocks. -uint32_t SelectionDAGBuilder::getEdgeWeight(const MachineBasicBlock *Src, - const MachineBasicBlock *Dst) const { +BranchProbability +SelectionDAGBuilder::getEdgeProbability(const MachineBasicBlock *Src, + const MachineBasicBlock *Dst) const { BranchProbabilityInfo *BPI = FuncInfo.BPI; - if (!BPI) - return 0; const BasicBlock *SrcBB = Src->getBasicBlock(); const BasicBlock *DstBB = Dst->getBasicBlock(); - return BPI->getEdgeWeight(SrcBB, DstBB); + if (!BPI) { + // If BPI is not available, set the default probability as 1 / N, where N is + // the number of successors. + auto SuccSize = std::max<uint32_t>( + std::distance(succ_begin(SrcBB), succ_end(SrcBB)), 1); + return BranchProbability(1, SuccSize); + } + return BPI->getEdgeProbability(SrcBB, DstBB); } -void SelectionDAGBuilder:: -addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst, - uint32_t Weight /* = 0 */) { - if (!Weight) - Weight = getEdgeWeight(Src, Dst); - Src->addSuccessor(Dst, Weight); +void SelectionDAGBuilder::addSuccessorWithProb(MachineBasicBlock *Src, + MachineBasicBlock *Dst, + BranchProbability Prob) { + if (!FuncInfo.BPI) + Src->addSuccessorWithoutProb(Dst); + else { + if (Prob.isUnknown()) + Prob = getEdgeProbability(Src, Dst); + Src->addSuccessor(Dst, Prob); + } } - static bool InBlock(const Value *V, const BasicBlock *BB) { if (const Instruction *I = dyn_cast<Instruction>(V)) return I->getParent() == BB; @@ -1369,8 +1513,8 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, - uint32_t TWeight, - uint32_t FWeight) { + BranchProbability TProb, + BranchProbability FProb) { const BasicBlock *BB = CurBB->getBasicBlock(); // If the leaf of the tree is a comparison, merge the condition into @@ -1385,17 +1529,15 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, ISD::CondCode Condition; if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) { Condition = getICmpCondCode(IC->getPredicate()); - } else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) { + } else { + const FCmpInst *FC = cast<FCmpInst>(Cond); Condition = getFCmpCondCode(FC->getPredicate()); if (TM.Options.NoNaNsFPMath) Condition = getFCmpCodeWithoutNaN(Condition); - } else { - (void)Condition; // silence warning. - llvm_unreachable("Unknown compare instruction"); } CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr, - TBB, FBB, CurBB, TWeight, FWeight); + TBB, FBB, CurBB, TProb, FProb); SwitchCases.push_back(CB); return; } @@ -1403,26 +1545,19 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, // Create a CaseBlock record representing this branch. CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()), - nullptr, TBB, FBB, CurBB, TWeight, FWeight); + nullptr, TBB, FBB, CurBB, TProb, FProb); SwitchCases.push_back(CB); } -/// Scale down both weights to fit into uint32_t. -static void ScaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) { - uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse; - uint32_t Scale = (NewMax / UINT32_MAX) + 1; - NewTrue = NewTrue / Scale; - NewFalse = NewFalse / Scale; -} - /// FindMergedConditions - If Cond is an expression like void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, - unsigned Opc, uint32_t TWeight, - uint32_t FWeight) { + Instruction::BinaryOps Opc, + BranchProbability TProb, + BranchProbability FProb) { // If this node is not part of the or/and tree, emit it as a branch. const Instruction *BOp = dyn_cast<Instruction>(Cond); if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) || @@ -1431,12 +1566,12 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) || !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) { EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, - TWeight, FWeight); + TProb, FProb); return; } // Create TmpBB after CurBB. - MachineFunction::iterator BBI = CurBB; + MachineFunction::iterator BBI(CurBB); MachineFunction &MF = DAG.getMachineFunction(); MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock()); CurBB->getParent()->insert(++BBI, TmpBB); @@ -1455,26 +1590,25 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, // The requirement is that // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) // = TrueProb for original BB. - // Assuming the original weights are A and B, one choice is to set BB1's - // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice - // assumes that + // Assuming the original probabilities are A and B, one choice is to set + // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to + // A/(1+B) and 2B/(1+B). This choice assumes that // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. // Another choice is to assume TrueProb for BB1 equals to TrueProb for // TmpBB, but the math is more complicated. - uint64_t NewTrueWeight = TWeight; - uint64_t NewFalseWeight = (uint64_t)TWeight + 2 * (uint64_t)FWeight; - ScaleWeights(NewTrueWeight, NewFalseWeight); + auto NewTrueProb = TProb / 2; + auto NewFalseProb = TProb / 2 + FProb; // Emit the LHS condition. FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc, - NewTrueWeight, NewFalseWeight); + NewTrueProb, NewFalseProb); - NewTrueWeight = TWeight; - NewFalseWeight = 2 * (uint64_t)FWeight; - ScaleWeights(NewTrueWeight, NewFalseWeight); + // Normalize A/2 and B to get A/(1+B) and 2B/(1+B). + SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb}; + BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); // Emit the RHS condition into TmpBB. FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, - NewTrueWeight, NewFalseWeight); + Probs[0], Probs[1]); } else { assert(Opc == Instruction::And && "Unknown merge op!"); // Codegen X & Y as: @@ -1491,24 +1625,23 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, // The requirement is that // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) // = FalseProb for original BB. - // Assuming the original weights are A and B, one choice is to set BB1's - // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice - // assumes that - // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB. - - uint64_t NewTrueWeight = 2 * (uint64_t)TWeight + (uint64_t)FWeight; - uint64_t NewFalseWeight = FWeight; - ScaleWeights(NewTrueWeight, NewFalseWeight); + // Assuming the original probabilities are A and B, one choice is to set + // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to + // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 == + // TrueProb for BB1 * FalseProb for TmpBB. + + auto NewTrueProb = TProb + FProb / 2; + auto NewFalseProb = FProb / 2; // Emit the LHS condition. FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc, - NewTrueWeight, NewFalseWeight); + NewTrueProb, NewFalseProb); - NewTrueWeight = 2 * (uint64_t)TWeight; - NewFalseWeight = FWeight; - ScaleWeights(NewTrueWeight, NewFalseWeight); + // Normalize A and B/2 to get 2A/(1+A) and B/(1+A). + SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2}; + BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); // Emit the RHS condition into TmpBB. FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, - NewTrueWeight, NewFalseWeight); + Probs[0], Probs[1]); } } @@ -1585,12 +1718,14 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // jle foo // if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) { - if (!DAG.getTargetLoweringInfo().isJumpExpensive() && - BOp->hasOneUse() && (BOp->getOpcode() == Instruction::And || - BOp->getOpcode() == Instruction::Or)) { + Instruction::BinaryOps Opcode = BOp->getOpcode(); + if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() && + !I.getMetadata(LLVMContext::MD_unpredictable) && + (Opcode == Instruction::And || Opcode == Instruction::Or)) { FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, - BOp->getOpcode(), getEdgeWeight(BrMBB, Succ0MBB), - getEdgeWeight(BrMBB, Succ1MBB)); + Opcode, + getEdgeProbability(BrMBB, Succ0MBB), + getEdgeProbability(BrMBB, Succ1MBB)); // If the compares in later blocks need to use values not currently // exported from this block, export them now. This block should always // be the first entry. @@ -1669,11 +1804,12 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, } // Update successor info - addSuccessorWithWeight(SwitchBB, CB.TrueBB, CB.TrueWeight); + addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb); // TrueBB and FalseBB are always different unless the incoming IR is // degenerate. This only happens when running llc on weird IR. if (CB.TrueBB != CB.FalseBB) - addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight); + addSuccessorWithProb(SwitchBB, CB.FalseBB, CB.FalseProb); + SwitchBB->normalizeSuccProbs(); // If the lhs block is the next block, invert the condition so that we can // fall through to the lhs instead of the rhs block. @@ -1797,10 +1933,10 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, GuardPtr, MachinePointerInfo(IRGuard, 0), true, false, false, Align); - SDValue StackSlot = DAG.getLoad(PtrTy, dl, DAG.getEntryNode(), - StackSlotPtr, - MachinePointerInfo::getFixedStack(FI), - true, false, false, Align); + SDValue StackSlot = DAG.getLoad( + PtrTy, dl, DAG.getEntryNode(), StackSlotPtr, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), true, + false, false, Align); // Perform the comparison via a subtract/getsetcc. EVT VT = Guard.getValueType(); @@ -1837,7 +1973,7 @@ SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Chain = TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid, - nullptr, 0, false, getCurSDLoc(), false, false).second; + None, false, getCurSDLoc(), false, false).second; DAG.setRoot(Chain); } @@ -1884,8 +2020,9 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, MachineBasicBlock* MBB = B.Cases[0].ThisBB; - addSuccessorWithWeight(SwitchBB, B.Default); - addSuccessorWithWeight(SwitchBB, MBB); + addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb); + addSuccessorWithProb(SwitchBB, MBB, B.Prob); + SwitchBB->normalizeSuccProbs(); SDValue BrRange = DAG.getNode(ISD::BRCOND, dl, MVT::Other, CopyTo, RangeCmp, @@ -1902,7 +2039,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, /// visitBitTestCase - this function produces one "bit test" void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, MachineBasicBlock* NextMBB, - uint32_t BranchWeightToNext, + BranchProbability BranchProbToNext, unsigned Reg, BitTestCase &B, MachineBasicBlock *SwitchBB) { @@ -1938,10 +2075,14 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, AndOp, DAG.getConstant(0, dl, VT), ISD::SETNE); } - // The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight. - addSuccessorWithWeight(SwitchBB, B.TargetBB, B.ExtraWeight); - // The branch weight from SwitchBB to NextMBB is BranchWeightToNext. - addSuccessorWithWeight(SwitchBB, NextMBB, BranchWeightToNext); + // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb. + addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb); + // The branch probability from SwitchBB to NextMBB is BranchProbToNext. + addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext); + // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is + // one as they are relative probabilities (and thus work more like weights), + // and hence we need to normalize them to let the sum of them become one. + SwitchBB->normalizeSuccProbs(); SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl, MVT::Other, getControlRoot(), @@ -1958,9 +2099,10 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { MachineBasicBlock *InvokeMBB = FuncInfo.MBB; - // Retrieve successors. + // Retrieve successors. Look through artificial IR level blocks like + // catchswitch for successors. MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)]; - MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)]; + const BasicBlock *EHPadBB = I.getSuccessor(1); const Value *Callee(I.getCalledValue()); const Function *Fn = dyn_cast<Function>(Callee); @@ -1975,14 +2117,14 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { break; case Intrinsic::experimental_patchpoint_void: case Intrinsic::experimental_patchpoint_i64: - visitPatchpoint(&I, LandingPad); + visitPatchpoint(&I, EHPadBB); break; case Intrinsic::experimental_gc_statepoint: - LowerStatepoint(ImmutableStatepoint(&I), LandingPad); + LowerStatepoint(ImmutableStatepoint(&I), EHPadBB); break; } } else - LowerCallTo(&I, getValue(Callee), false, LandingPad); + LowerCallTo(&I, getValue(Callee), false, EHPadBB); // If the value of the invoke is used outside of its defining block, make it // available as a virtual register. @@ -1992,9 +2134,20 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { CopyToExportRegsIfNeeded(&I); } - // Update successor info - addSuccessorWithWeight(InvokeMBB, Return); - addSuccessorWithWeight(InvokeMBB, LandingPad); + SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests; + BranchProbabilityInfo *BPI = FuncInfo.BPI; + BranchProbability EHPadBBProb = + BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB) + : BranchProbability::getZero(); + findUnwindDestinations(FuncInfo, EHPadBB, EHPadBBProb, UnwindDests); + + // Update successor info. + addSuccessorWithProb(InvokeMBB, Return); + for (auto &UnwindDest : UnwindDests) { + UnwindDest.first->setIsEHPad(); + addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second); + } + InvokeMBB->normalizeSuccProbs(); // Drop into normal successor. DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), @@ -2007,7 +2160,7 @@ void SelectionDAGBuilder::visitResume(const ResumeInst &RI) { } void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { - assert(FuncInfo.MBB->isLandingPad() && + assert(FuncInfo.MBB->isEHPad() && "Call to landingpad not in landing pad!"); MachineBasicBlock *MBB = FuncInfo.MBB; @@ -2017,8 +2170,16 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { // If there aren't registers to copy the values into (e.g., during SjLj // exceptions), then don't bother to create these DAG nodes. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLI.getExceptionPointerRegister() == 0 && - TLI.getExceptionSelectorRegister() == 0) + const Constant *PersonalityFn = FuncInfo.Fn->getPersonalityFn(); + if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 && + TLI.getExceptionSelectorRegister(PersonalityFn) == 0) + return; + + // If landingpad's return type is token type, we don't create DAG nodes + // for its exception pointer and selector value. The extraction of exception + // pointer or selector value from token type landingpads is not currently + // supported. + if (LP.getType()->isTokenTy()) return; SmallVector<EVT, 2> ValueVTs; @@ -2074,8 +2235,7 @@ void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) { // If this case has the same successor and is a neighbour, merge it into // the previous cluster. Clusters[DstIndex - 1].High = CaseVal; - Clusters[DstIndex - 1].Weight += CC.Weight; - assert(Clusters[DstIndex - 1].Weight >= CC.Weight && "Weight overflow!"); + Clusters[DstIndex - 1].Prob += CC.Prob; } else { std::memmove(&Clusters[DstIndex++], &Clusters[SrcIndex], sizeof(Clusters[SrcIndex])); @@ -2109,8 +2269,9 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { continue; MachineBasicBlock *Succ = FuncInfo.MBBMap[BB]; - addSuccessorWithWeight(IndirectBrMBB, Succ); + addSuccessorWithProb(IndirectBrMBB, Succ); } + IndirectBrMBB->normalizeSuccProbs(); DAG.setRoot(DAG.getNode(ISD::BRIND, getCurSDLoc(), MVT::Other, getControlRoot(), @@ -2119,7 +2280,8 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) { if (DAG.getTarget().Options.TrapUnreachable) - DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot())); + DAG.setRoot( + DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot())); } void SelectionDAGBuilder::visitFSub(const User &I) { @@ -2260,6 +2422,10 @@ void SelectionDAGBuilder::visitFCmp(const User &I) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Condition = getFCmpCondCode(predicate); + + // FIXME: Fcmp instructions have fast-math-flags in IR, so we should use them. + // FIXME: We should propagate the fast-math-flags to the DAG node itself for + // further optimization, but currently FMF is only applicable to binary nodes. if (TM.Options.NoNaNsFPMath) Condition = getFCmpCodeWithoutNaN(Condition); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), @@ -2284,27 +2450,74 @@ void SelectionDAGBuilder::visitSelect(const User &I) { // Min/max matching is only viable if all output VTs are the same. if (std::equal(ValueVTs.begin(), ValueVTs.end(), ValueVTs.begin())) { - Value *LHS, *RHS; - SelectPatternFlavor SPF = matchSelectPattern(const_cast<User*>(&I), LHS, RHS); - ISD::NodeType Opc = ISD::DELETED_NODE; - switch (SPF) { - case SPF_UMAX: Opc = ISD::UMAX; break; - case SPF_UMIN: Opc = ISD::UMIN; break; - case SPF_SMAX: Opc = ISD::SMAX; break; - case SPF_SMIN: Opc = ISD::SMIN; break; - default: break; - } - EVT VT = ValueVTs[0]; LLVMContext &Ctx = *DAG.getContext(); auto &TLI = DAG.getTargetLoweringInfo(); - while (TLI.getTypeAction(Ctx, VT) == TargetLoweringBase::TypeSplitVector) + + // We care about the legality of the operation after it has been type + // legalized. + while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal && + VT != TLI.getTypeToTransformTo(Ctx, VT)) VT = TLI.getTypeToTransformTo(Ctx, VT); - if (Opc != ISD::DELETED_NODE && TLI.isOperationLegalOrCustom(Opc, VT) && - // If the underlying comparison instruction is used by any other instruction, - // the consumed instructions won't be destroyed, so it is not profitable - // to convert to a min/max. + // If the vselect is legal, assume we want to leave this as a vector setcc + + // vselect. Otherwise, if this is going to be scalarized, we want to see if + // min/max is legal on the scalar type. + bool UseScalarMinMax = VT.isVector() && + !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT); + + Value *LHS, *RHS; + auto SPR = matchSelectPattern(const_cast<User*>(&I), LHS, RHS); + ISD::NodeType Opc = ISD::DELETED_NODE; + switch (SPR.Flavor) { + case SPF_UMAX: Opc = ISD::UMAX; break; + case SPF_UMIN: Opc = ISD::UMIN; break; + case SPF_SMAX: Opc = ISD::SMAX; break; + case SPF_SMIN: Opc = ISD::SMIN; break; + case SPF_FMINNUM: + switch (SPR.NaNBehavior) { + case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?"); + case SPNB_RETURNS_NAN: Opc = ISD::FMINNAN; break; + case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break; + case SPNB_RETURNS_ANY: { + if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT)) + Opc = ISD::FMINNUM; + else if (TLI.isOperationLegalOrCustom(ISD::FMINNAN, VT)) + Opc = ISD::FMINNAN; + else if (UseScalarMinMax) + Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()) ? + ISD::FMINNUM : ISD::FMINNAN; + break; + } + } + break; + case SPF_FMAXNUM: + switch (SPR.NaNBehavior) { + case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?"); + case SPNB_RETURNS_NAN: Opc = ISD::FMAXNAN; break; + case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break; + case SPNB_RETURNS_ANY: + + if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT)) + Opc = ISD::FMAXNUM; + else if (TLI.isOperationLegalOrCustom(ISD::FMAXNAN, VT)) + Opc = ISD::FMAXNAN; + else if (UseScalarMinMax) + Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()) ? + ISD::FMAXNUM : ISD::FMAXNAN; + break; + } + break; + default: break; + } + + if (Opc != ISD::DELETED_NODE && + (TLI.isOperationLegalOrCustom(Opc, VT) || + (UseScalarMinMax && + TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) && + // If the underlying comparison instruction is used by any other + // instruction, the consumed instructions won't be destroyed, so it is + // not profitable to convert to a min/max. cast<SelectInst>(&I)->getCondition()->hasOneUse()) { OpCode = Opc; LHSVal = getValue(LHS); @@ -2920,7 +3133,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { // throughout the function's lifetime. bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr && - isDereferenceablePointer(SV, *DAG.getTarget().getDataLayout()); + isDereferenceablePointer(SV, DAG.getDataLayout()); unsigned Alignment = I.getAlignment(); AAMDNodes AAInfo; @@ -2940,8 +3153,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { if (isVolatile || NumValues > MaxParallelChains) // Serialize volatile loads with other side effects. Root = getRoot(); - else if (AA->pointsToConstantMemory( - MemoryLocation(SV, AA->getTypeStoreSize(Ty), AAInfo))) { + else if (AA->pointsToConstantMemory(MemoryLocation( + SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. Root = DAG.getEntryNode(); ConstantMemory = true; @@ -3056,7 +3269,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) { SDLoc sdl = getCurSDLoc(); - // llvm.masked.store.*(Src0, Ptr, alignemt, Mask) + // llvm.masked.store.*(Src0, Ptr, alignment, Mask) Value *PtrOperand = I.getArgOperand(1); SDValue Ptr = getValue(PtrOperand); SDValue Src0 = getValue(I.getArgOperand(0)); @@ -3080,63 +3293,70 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) { setValue(&I, StoreNode); } -// Gather/scatter receive a vector of pointers. -// This vector of pointers may be represented as a base pointer + vector of -// indices, it depends on GEP and instruction preceeding GEP -// that calculates indices -static bool getUniformBase(Value *& Ptr, SDValue& Base, SDValue& Index, +// Get a uniform base for the Gather/Scatter intrinsic. +// The first argument of the Gather/Scatter intrinsic is a vector of pointers. +// We try to represent it as a base pointer + vector of indices. +// Usually, the vector of pointers comes from a 'getelementptr' instruction. +// The first operand of the GEP may be a single pointer or a vector of pointers +// Example: +// %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind +// or +// %gep.ptr = getelementptr i32, i32* %ptr, <8 x i32> %ind +// %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, .. +// +// When the first GEP operand is a single pointer - it is the uniform base we +// are looking for. If first operand of the GEP is a splat vector - we +// extract the spalt value and use it as a uniform base. +// In all other cases the function returns 'false'. +// +static bool getUniformBase(const Value *& Ptr, SDValue& Base, SDValue& Index, SelectionDAGBuilder* SDB) { - assert (Ptr->getType()->isVectorTy() && "Uexpected pointer type"); - GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr); - if (!Gep || Gep->getNumOperands() > 2) + SelectionDAG& DAG = SDB->DAG; + LLVMContext &Context = *DAG.getContext(); + + assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type"); + const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr); + if (!GEP || GEP->getNumOperands() > 2) return false; - ShuffleVectorInst *ShuffleInst = - dyn_cast<ShuffleVectorInst>(Gep->getPointerOperand()); - if (!ShuffleInst || !ShuffleInst->getMask()->isNullValue() || - cast<Instruction>(ShuffleInst->getOperand(0))->getOpcode() != - Instruction::InsertElement) + + const Value *GEPPtr = GEP->getPointerOperand(); + if (!GEPPtr->getType()->isVectorTy()) + Ptr = GEPPtr; + else if (!(Ptr = getSplatValue(GEPPtr))) return false; - Ptr = cast<InsertElementInst>(ShuffleInst->getOperand(0))->getOperand(1); + Value *IndexVal = GEP->getOperand(1); - SelectionDAG& DAG = SDB->DAG; - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - // Check is the Ptr is inside current basic block - // If not, look for the shuffle instruction - if (SDB->findValue(Ptr)) - Base = SDB->getValue(Ptr); - else if (SDB->findValue(ShuffleInst)) { - SDValue ShuffleNode = SDB->getValue(ShuffleInst); - SDLoc sdl = ShuffleNode; - Base = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, sdl, - ShuffleNode.getValueType().getScalarType(), ShuffleNode, - DAG.getConstant(0, sdl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - SDB->setValue(Ptr, Base); - } - else + // The operands of the GEP may be defined in another basic block. + // In this case we'll not find nodes for the operands. + if (!SDB->findValue(Ptr) || !SDB->findValue(IndexVal)) return false; - Value *IndexVal = Gep->getOperand(1); - if (SDB->findValue(IndexVal)) { - Index = SDB->getValue(IndexVal); + Base = SDB->getValue(Ptr); + Index = SDB->getValue(IndexVal); - if (SExtInst* Sext = dyn_cast<SExtInst>(IndexVal)) { + // Suppress sign extension. + if (SExtInst* Sext = dyn_cast<SExtInst>(IndexVal)) { + if (SDB->findValue(Sext->getOperand(0))) { IndexVal = Sext->getOperand(0); - if (SDB->findValue(IndexVal)) - Index = SDB->getValue(IndexVal); + Index = SDB->getValue(IndexVal); } - return true; } - return false; + if (!Index.getValueType().isVector()) { + unsigned GEPWidth = GEP->getType()->getVectorNumElements(); + EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth); + SmallVector<SDValue, 16> Ops(GEPWidth, Index); + Index = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Index), VT, Ops); + } + return true; } void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { SDLoc sdl = getCurSDLoc(); // llvm.masked.scatter.*(Src0, Ptrs, alignemt, Mask) - Value *Ptr = I.getArgOperand(1); + const Value *Ptr = I.getArgOperand(1); SDValue Src0 = getValue(I.getArgOperand(0)); SDValue Mask = getValue(I.getArgOperand(3)); EVT VT = Src0.getValueType(); @@ -3150,10 +3370,10 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { SDValue Base; SDValue Index; - Value *BasePtr = Ptr; + const Value *BasePtr = Ptr; bool UniformBase = getUniformBase(BasePtr, Base, Index, this); - Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr; + const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr; MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MachinePointerInfo(MemOpBasePtr), MachineMemOperand::MOStore, VT.getStoreSize(), @@ -3190,7 +3410,8 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) { SDValue InChain = DAG.getRoot(); if (AA->pointsToConstantMemory(MemoryLocation( - PtrOperand, AA->getTypeStoreSize(I.getType()), AAInfo))) { + PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()), + AAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. InChain = DAG.getEntryNode(); } @@ -3212,7 +3433,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { SDLoc sdl = getCurSDLoc(); // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0) - Value *Ptr = I.getArgOperand(0); + const Value *Ptr = I.getArgOperand(0); SDValue Src0 = getValue(I.getArgOperand(3)); SDValue Mask = getValue(I.getArgOperand(2)); @@ -3229,12 +3450,13 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { SDValue Root = DAG.getRoot(); SDValue Base; SDValue Index; - Value *BasePtr = Ptr; + const Value *BasePtr = Ptr; bool UniformBase = getUniformBase(BasePtr, Base, Index, this); bool ConstantMemory = false; if (UniformBase && - AA->pointsToConstantMemory( - MemoryLocation(BasePtr, AA->getTypeStoreSize(I.getType()), AAInfo))) { + AA->pointsToConstantMemory(MemoryLocation( + BasePtr, DAG.getDataLayout().getTypeStoreSize(I.getType()), + AAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. Root = DAG.getEntryNode(); ConstantMemory = true; @@ -3511,6 +3733,8 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt, SDLoc dl) { static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl, SelectionDAG &DAG) { + // TODO: What fast-math-flags should be set on the floating-point nodes? + // IntegerPartOfX = ((int32_t)(t0); SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); @@ -3609,6 +3833,8 @@ static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // #define LOG2OFe 1.4426950f // t0 = Op * LOG2OFe + + // TODO: What fast-math-flags should be set here? SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, getF32Constant(DAG, 0x3fb8aa3b, dl)); return getLimitedPrecisionExp2(t0, dl, DAG); @@ -3622,6 +3848,9 @@ static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG, /// limited-precision mode. static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { + + // TODO: What fast-math-flags should be set on the floating-point nodes? + if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); @@ -3718,6 +3947,9 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, /// limited-precision mode. static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { + + // TODO: What fast-math-flags should be set on the floating-point nodes? + if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); @@ -3813,6 +4045,9 @@ static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG, /// limited-precision mode. static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { + + // TODO: What fast-math-flags should be set on the floating-point nodes? + if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); @@ -3922,6 +4157,7 @@ static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS, } } + // TODO: What fast-math-flags should be set on the FMUL node? if (IsExp10) { // Put the exponent in the right bit position for later addition to the // final result: @@ -3955,9 +4191,9 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS, return DAG.getConstantFP(1.0, DL, LHS.getValueType()); const Function *F = DAG.getMachineFunction().getFunction(); - if (!F->hasFnAttribute(Attribute::OptimizeForSize) || - // If optimizing for size, don't insert too many multiplies. This - // inserts up to 5 multiplies. + if (!F->optForSize() || + // If optimizing for size, don't insert too many multiplies. + // This inserts up to 5 multiplies. countPopulation(Val) + Log2_32(Val) < 7) { // We use the simple binary decomposition method to generate the multiply // sequence. There are more optimal ways to do this (for example, @@ -3965,6 +4201,8 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS, // the benefit of being both really simple and much better than a libcall. SDValue Res; // Logically starts equal to 1.0 SDValue CurSquare = LHS; + // TODO: Intrinsics should have fast-math-flags that propagate to these + // nodes. while (Val) { if (Val & 1) { if (Res.getNode()) @@ -3990,22 +4228,20 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS, return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS); } -// getTruncatedArgReg - Find underlying register used for an truncated -// argument. -static unsigned getTruncatedArgReg(const SDValue &N) { - if (N.getOpcode() != ISD::TRUNCATE) +// getUnderlyingArgReg - Find underlying register used for a truncated or +// bitcasted argument. +static unsigned getUnderlyingArgReg(const SDValue &N) { + switch (N.getOpcode()) { + case ISD::CopyFromReg: + return cast<RegisterSDNode>(N.getOperand(1))->getReg(); + case ISD::BITCAST: + case ISD::AssertZext: + case ISD::AssertSext: + case ISD::TRUNCATE: + return getUnderlyingArgReg(N.getOperand(0)); + default: return 0; - - const SDValue &Ext = N.getOperand(0); - if (Ext.getOpcode() == ISD::AssertZext || - Ext.getOpcode() == ISD::AssertSext) { - const SDValue &CFR = Ext.getOperand(0); - if (CFR.getOpcode() == ISD::CopyFromReg) - return cast<RegisterSDNode>(CFR.getOperand(1))->getReg(); - if (CFR.getOpcode() == ISD::TRUNCATE) - return getTruncatedArgReg(CFR); } - return 0; } /// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function @@ -4033,11 +4269,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( Op = MachineOperand::CreateFI(FI); if (!Op && N.getNode()) { - unsigned Reg; - if (N.getOpcode() == ISD::CopyFromReg) - Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg(); - else - Reg = getTruncatedArgReg(N); + unsigned Reg = getUnderlyingArgReg(N); if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) { MachineRegisterInfo &RegInfo = MF.getRegInfo(); unsigned PR = RegInfo.getLiveInPhysReg(Reg); @@ -4145,14 +4377,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::longjmp: return &"_longjmp"[!TLI.usesUnderscoreLongJmp()]; case Intrinsic::memcpy: { - // FIXME: this definition of "user defined address space" is x86-specific - // Assert for address < 256 since we support only user defined address - // spaces. - assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace() - < 256 && - cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace() - < 256 && - "Unknown address space"); SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); @@ -4169,12 +4393,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } case Intrinsic::memset: { - // FIXME: this definition of "user defined address space" is x86-specific - // Assert for address < 256 since we support only user defined address - // spaces. - assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace() - < 256 && - "Unknown address space"); SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); @@ -4189,14 +4407,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } case Intrinsic::memmove: { - // FIXME: this definition of "user defined address space" is x86-specific - // Assert for address < 256 since we support only user defined address - // spaces. - assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace() - < 256 && - cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace() - < 256 && - "Unknown address space"); SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); @@ -4238,33 +4448,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) Address = BCI->getOperand(0); // Parameters are handled specially. - bool isParameter = Variable->getTag() == dwarf::DW_TAG_arg_variable || - isa<Argument>(Address); - - const AllocaInst *AI = dyn_cast<AllocaInst>(Address); - - if (isParameter && !AI) { - FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode()); - if (FINode) - // Byval parameter. We have a frame index at this point. - SDV = DAG.getFrameIndexDbgValue( - Variable, Expression, FINode->getIndex(), 0, dl, SDNodeOrder); - else { - // Address is an argument, so try to emit its dbg value using - // virtual register info from the FuncInfo.ValueMap. - EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false, - N); - return nullptr; - } - } else if (AI) + bool isParameter = Variable->isParameter() || isa<Argument>(Address); + auto FINode = dyn_cast<FrameIndexSDNode>(N.getNode()); + if (isParameter && FINode) { + // Byval parameter. We have a frame index at this point. + SDV = DAG.getFrameIndexDbgValue(Variable, Expression, + FINode->getIndex(), 0, dl, SDNodeOrder); + } else if (isa<Argument>(Address)) { + // Address is an argument, so try to emit its dbg value using + // virtual register info from the FuncInfo.ValueMap. + EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false, + N); + return nullptr; + } else { SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), true, 0, dl, SDNodeOrder); - else { - // Can't do anything with other non-AI cases yet. - DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); - DEBUG(dbgs() << "non-AllocaInst issue for Address: \n\t"); - DEBUG(Address->dump()); - return nullptr; } DAG.AddDbgValue(SDV, N.getNode(), isParameter); } else { @@ -4315,12 +4513,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Check unused arguments map. N = UnusedArgNodeMap[V]; if (N.getNode()) { - // A dbg.value for an alloca is always indirect. - bool IsIndirect = isa<AllocaInst>(V) || Offset != 0; if (!EmitFuncArgumentDbgValue(V, Variable, Expression, dl, Offset, - IsIndirect, N)) { + false, N)) { SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), - IsIndirect, Offset, dl, SDNodeOrder); + false, Offset, dl, SDNodeOrder); DAG.AddDbgValue(SDV, N.getNode(), false); } } else if (!V->use_empty() ) { @@ -4421,6 +4617,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getRoot(), getValue(I.getArgOperand(0)))); return nullptr; } + case Intrinsic::eh_sjlj_setup_dispatch: { + DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other, + getRoot())); + return nullptr; + } case Intrinsic::masked_gather: visitMaskedGather(I); @@ -4614,6 +4815,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)))); } else { + // TODO: Intrinsic calls should have fast-math-flags. SDValue Mul = DAG.getNode(ISD::FMUL, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), @@ -4652,6 +4854,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.setRoot(Res.getValue(1)); return nullptr; } + case Intrinsic::bitreverse: + setValue(&I, DAG.getNode(ISD::BITREVERSE, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)))); + return nullptr; case Intrinsic::bswap: setValue(&I, DAG.getNode(ISD::BSWAP, sdl, getValue(I.getArgOperand(0)).getValueType(), @@ -4693,6 +4900,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res)); return nullptr; } + case Intrinsic::get_dynamic_area_offset: { + SDValue Op = getRoot(); + EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); + EVT ResTy = TLI.getValueType(DAG.getDataLayout(), I.getType()); + // Result type for @llvm.get.dynamic.area.offset should match PtrTy for + // target. + if (PtrTy != ResTy) + report_fatal_error("Wrong result type for @llvm.get.dynamic.area.offset" + " intrinsic!"); + Res = DAG.getNode(ISD::GET_DYNAMIC_AREA_OFFSET, sdl, DAG.getVTList(ResTy), + Op); + DAG.setRoot(Op); + setValue(&I, Res); + return nullptr; + } case Intrinsic::stackprotector: { // Emit code into the DAG to store the stack guard onto the stack. MachineFunction &MF = DAG.getMachineFunction(); @@ -4743,8 +4965,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue FIN = DAG.getFrameIndex(FI, PtrTy); // Store the stack protector onto the stack. - Res = DAG.getStore(Chain, sdl, Src, FIN, - MachinePointerInfo::getFixedStack(FI), + Res = DAG.getStore(Chain, sdl, Src, FIN, MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), FI), true, false, 0); setValue(&I, Res); DAG.setRoot(Res); @@ -4946,9 +5168,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::clear_cache: return TLI.getClearCacheBuiltinName(); - case Intrinsic::eh_actions: - setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout()))); - return nullptr; case Intrinsic::donothing: // ignore return nullptr; @@ -4965,9 +5184,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { visitStatepoint(I); return nullptr; } - case Intrinsic::experimental_gc_result_int: - case Intrinsic::experimental_gc_result_float: - case Intrinsic::experimental_gc_result_ptr: case Intrinsic::experimental_gc_result: { visitGCResult(I); return nullptr; @@ -4978,7 +5194,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::instrprof_increment: llvm_unreachable("instrprof failed to lower an increment"); - + case Intrinsic::instrprof_value_profile: + llvm_unreachable("instrprof failed to lower a value profiling call"); case Intrinsic::localescape: { MachineFunction &MF = DAG.getMachineFunction(); const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); @@ -5032,19 +5249,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } - case Intrinsic::eh_begincatch: - case Intrinsic::eh_endcatch: - llvm_unreachable("begin/end catch intrinsics not lowered in codegen"); + + case Intrinsic::eh_exceptionpointer: case Intrinsic::eh_exceptioncode: { - unsigned Reg = TLI.getExceptionPointerRegister(); - assert(Reg && "cannot get exception code on this platform"); + // Get the exception pointer vreg, copy from it, and resize it to fit. + const auto *CPI = cast<CatchPadInst>(I.getArgOperand(0)); MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT); - assert(FuncInfo.MBB->isLandingPad() && "eh.exceptioncode in non-lpad"); - unsigned VReg = FuncInfo.MBB->addLiveIn(Reg, PtrRC); + unsigned VReg = FuncInfo.getCatchPadExceptionPointerVReg(CPI, PtrRC); SDValue N = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), VReg, PtrVT); - N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32); + if (Intrinsic == Intrinsic::eh_exceptioncode) + N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32); setValue(&I, N); return nullptr; } @@ -5053,11 +5269,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { std::pair<SDValue, SDValue> SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, - MachineBasicBlock *LandingPad) { + const BasicBlock *EHPadBB) { MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); MCSymbol *BeginLabel = nullptr; - if (LandingPad) { + if (EHPadBB) { // Insert a label before the invoke call to mark the try range. This can be // used to detect deletion of the invoke via the MachineModuleInfo. BeginLabel = MMI.getContext().createTempSymbol(); @@ -5067,7 +5283,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, unsigned CallSiteIndex = MMI.getCurrentCallSite(); if (CallSiteIndex) { MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex); - LPadToCallSiteMap[LandingPad].push_back(CallSiteIndex); + LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(CallSiteIndex); // Now that the call site is handled, stop tracking it. MMI.setCurrentCallSite(0); @@ -5100,14 +5316,21 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, DAG.setRoot(Result.second); } - if (LandingPad) { + if (EHPadBB) { // Insert a label at the end of the invoke call to mark the try range. This // can be used to detect deletion of the invoke via the MachineModuleInfo. MCSymbol *EndLabel = MMI.getContext().createTempSymbol(); DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel)); // Inform MachineModuleInfo of range. - MMI.addInvoke(LandingPad, BeginLabel, EndLabel); + if (MMI.hasEHFunclets()) { + assert(CLI.CS); + WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo(); + EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS->getInstruction()), + BeginLabel, EndLabel); + } else { + MMI.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel); + } } return Result; @@ -5115,7 +5338,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool isTailCall, - MachineBasicBlock *LandingPad) { + const BasicBlock *EHPadBB) { PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); FunctionType *FTy = cast<FunctionType>(PT->getElementType()); Type *RetTy = FTy->getReturnType(); @@ -5154,7 +5377,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) .setCallee(RetTy, FTy, Callee, std::move(Args), CS) .setTailCall(isTailCall); - std::pair<SDValue,SDValue> Result = lowerInvokable(CLI, LandingPad); + std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB); if (Result.first.getNode()) setValue(CS.getInstruction(), Result.first); @@ -5978,7 +6201,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; if (OpInfo.ConstraintVT != Input.ConstraintVT) { - const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo(); std::pair<unsigned, const TargetRegisterClass *> MatchRC = TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode, OpInfo.ConstraintVT); @@ -6037,10 +6260,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy(DAG.getDataLayout())); - Chain = DAG.getStore(Chain, getCurSDLoc(), - OpInfo.CallOperand, StackSlot, - MachinePointerInfo::getFixedStack(SSFI), - false, false, 0); + Chain = DAG.getStore( + Chain, getCurSDLoc(), OpInfo.CallOperand, StackSlot, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI), + false, false, 0); OpInfo.CallOperand = StackSlot; } @@ -6460,12 +6683,9 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) { /// This is a helper for lowering intrinsics that follow a target calling /// convention or require stack pointer adjustment. Only a subset of the /// intrinsic's operands need to participate in the calling convention. -std::pair<SDValue, SDValue> -SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx, - unsigned NumArgs, SDValue Callee, - Type *ReturnTy, - MachineBasicBlock *LandingPad, - bool IsPatchPoint) { +std::pair<SDValue, SDValue> SelectionDAGBuilder::lowerCallOperands( + ImmutableCallSite CS, unsigned ArgIdx, unsigned NumArgs, SDValue Callee, + Type *ReturnTy, const BasicBlock *EHPadBB, bool IsPatchPoint) { TargetLowering::ArgListTy Args; Args.reserve(NumArgs); @@ -6489,7 +6709,7 @@ SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx, .setCallee(CS.getCallingConv(), ReturnTy, Callee, std::move(Args), NumArgs) .setDiscardResult(CS->use_empty()).setIsPatchPoint(IsPatchPoint); - return lowerInvokable(CLI, LandingPad); + return lowerInvokable(CLI, EHPadBB); } /// \brief Add a stack map intrinsic call's live variable operands to a stackmap @@ -6593,7 +6813,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { /// \brief Lower llvm.experimental.patchpoint directly to its target opcode. void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, - MachineBasicBlock *LandingPad) { + const BasicBlock *EHPadBB) { // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>, // i32 <numBytes>, // i8* <target>, @@ -6630,9 +6850,8 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs; Type *ReturnTy = IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CS->getType(); - std::pair<SDValue, SDValue> Result = - lowerCallOperands(CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy, - LandingPad, true); + std::pair<SDValue, SDValue> Result = lowerCallOperands( + CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy, EHPadBB, true); SDNode *CallEnd = Result.second.getNode(); if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg)) @@ -6926,8 +7145,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { i, j*Parts[j].getValueType().getStoreSize()); if (NumParts > 1 && j == 0) MyFlags.Flags.setSplit(); - else if (j != 0) + else if (j != 0) { MyFlags.Flags.setOrigAlign(1); + if (j == NumParts - 1) + MyFlags.Flags.setSplitEnd(); + } CLI.Outs.push_back(MyFlags); CLI.OutVals.push_back(Parts[j]); @@ -6986,8 +7208,9 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { PtrVT)); SDValue L = CLI.DAG.getLoad( RetTys[i], CLI.DL, CLI.Chain, Add, - MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), false, - false, false, 1); + MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(), + DemoteStackIdx, Offsets[i]), + false, false, false, 1); ReturnValues[i] = L; Chains[i] = L.getValue(1); } @@ -7069,9 +7292,9 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { if (FastISel) return A->use_empty(); - const BasicBlock *Entry = A->getParent()->begin(); + const BasicBlock &Entry = A->getParent()->front(); for (const User *U : A->users()) - if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U)) + if (cast<Instruction>(U)->getParent() != &Entry || isa<SwitchInst>(U)) return false; // Use not in entry block. return true; @@ -7138,6 +7361,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // in the various CC lowering callbacks. Flags.setByVal(); } + if (F.getCallingConv() == CallingConv::X86_INTR) { + // IA Interrupt passes frame (1st parameter) by value in the stack. + if (Idx == 1) + Flags.setByVal(); + } if (Flags.isByVal() || Flags.isInAlloca()) { PointerType *Ty = cast<PointerType>(I->getType()); Type *ElementTy = Ty->getElementType(); @@ -7165,8 +7393,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) { if (NumRegs > 1 && i == 0) MyFlags.Flags.setSplit(); // if it isn't first piece, alignment must be 1 - else if (i > 0) + else if (i > 0) { MyFlags.Flags.setOrigAlign(1); + if (i == NumRegs - 1) + MyFlags.Flags.setSplitEnd(); + } Ins.push_back(MyFlags); } if (NeedsRegBlock && Value == NumValues - 1) @@ -7235,12 +7466,12 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // If this argument is unused then remember its value. It is used to generate // debugging information. if (I->use_empty() && NumValues) { - SDB->setUnusedArgValue(I, InVals[i]); + SDB->setUnusedArgValue(&*I, InVals[i]); // Also remember any frame index for use in FastISel. if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(InVals[i].getNode())) - FuncInfo->setArgumentFrameIndex(I, FI->getIndex()); + FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex()); } for (unsigned Val = 0; Val != NumValues; ++Val) { @@ -7270,18 +7501,18 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // Note down frame index. if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode())) - FuncInfo->setArgumentFrameIndex(I, FI->getIndex()); + FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex()); SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues), SDB->getCurSDLoc()); - SDB->setValue(I, Res); + SDB->setValue(&*I, Res); if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) { if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(Res.getOperand(0).getNode())) if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) - FuncInfo->setArgumentFrameIndex(I, FI->getIndex()); + FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex()); } // If this argument is live outside of the entry block, insert a copy from @@ -7293,13 +7524,13 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // uses with vregs. unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) { - FuncInfo->ValueMap[I] = Reg; + FuncInfo->ValueMap[&*I] = Reg; continue; } } - if (!isOnlyUsedInEntryBlock(I, TM.Options.EnableFastISel)) { - FuncInfo->InitializeRegForValue(I); - SDB->CopyToExportRegsIfNeeded(I); + if (!isOnlyUsedInEntryBlock(&*I, TM.Options.EnableFastISel)) { + FuncInfo->InitializeRegForValue(&*I); + SDB->CopyToExportRegsIfNeeded(&*I); } } @@ -7401,21 +7632,21 @@ AddSuccessorMBB(const BasicBlock *BB, // If SuccBB has not been created yet, create it. if (!SuccMBB) { MachineFunction *MF = ParentMBB->getParent(); - MachineFunction::iterator BBI = ParentMBB; + MachineFunction::iterator BBI(ParentMBB); SuccMBB = MF->CreateMachineBasicBlock(BB); MF->insert(++BBI, SuccMBB); } // Add it as a successor of ParentMBB. ParentMBB->addSuccessor( - SuccMBB, BranchProbabilityInfo::getBranchWeightStackProtector(IsLikely)); + SuccMBB, BranchProbabilityInfo::getBranchProbStackProtector(IsLikely)); return SuccMBB; } MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) { - MachineFunction::iterator I = MBB; + MachineFunction::iterator I(MBB); if (++I == FuncInfo.MF->end()) return nullptr; - return I; + return &*I; } /// During lowering new call nodes can be created (such as memset, etc.). @@ -7469,14 +7700,18 @@ bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters, CaseCluster &JTCluster) { assert(First <= Last); - uint32_t Weight = 0; + auto Prob = BranchProbability::getZero(); unsigned NumCmps = 0; std::vector<MachineBasicBlock*> Table; - DenseMap<MachineBasicBlock*, uint32_t> JTWeights; + DenseMap<MachineBasicBlock*, BranchProbability> JTProbs; + + // Initialize probabilities in JTProbs. + for (unsigned I = First; I <= Last; ++I) + JTProbs[Clusters[I].MBB] = BranchProbability::getZero(); + for (unsigned I = First; I <= Last; ++I) { assert(Clusters[I].Kind == CC_Range); - Weight += Clusters[I].Weight; - assert(Weight >= Clusters[I].Weight && "Weight overflow!"); + Prob += Clusters[I].Prob; APInt Low = Clusters[I].Low->getValue(); APInt High = Clusters[I].High->getValue(); NumCmps += (Low == High) ? 1 : 2; @@ -7491,10 +7726,10 @@ bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters, uint64_t ClusterSize = (High - Low).getLimitedValue() + 1; for (uint64_t J = 0; J < ClusterSize; ++J) Table.push_back(Clusters[I].MBB); - JTWeights[Clusters[I].MBB] += Clusters[I].Weight; + JTProbs[Clusters[I].MBB] += Clusters[I].Prob; } - unsigned NumDests = JTWeights.size(); + unsigned NumDests = JTProbs.size(); if (isSuitableForBitTests(NumDests, NumCmps, Clusters[First].Low->getValue(), Clusters[Last].High->getValue())) { @@ -7513,9 +7748,10 @@ bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters, for (MachineBasicBlock *Succ : Table) { if (Done.count(Succ)) continue; - addSuccessorWithWeight(JumpTableMBB, Succ, JTWeights[Succ]); + addSuccessorWithProb(JumpTableMBB, Succ, JTProbs[Succ]); Done.insert(Succ); } + JumpTableMBB->normalizeSuccProbs(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned JTI = CurMF->getOrCreateJumpTableInfo(TLI.getJumpTableEncoding()) @@ -7529,7 +7765,7 @@ bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters, JTCases.emplace_back(std::move(JTH), std::move(JT)); JTCluster = CaseCluster::jumpTable(Clusters[First].Low, Clusters[Last].High, - JTCases.size() - 1, Weight); + JTCases.size() - 1, Prob); return true; } @@ -7707,19 +7943,29 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, .getSizeInBits(); assert(rangeFitsInWord(Low, High) && "Case range must fit in bit mask!"); - if (Low.isNonNegative() && High.slt(BitWidth)) { - // Optimize the case where all the case values fit in a - // word without having to subtract minValue. In this case, - // we can optimize away the subtraction. + // Check if the clusters cover a contiguous range such that no value in the + // range will jump to the default statement. + bool ContiguousRange = true; + for (int64_t I = First + 1; I <= Last; ++I) { + if (Clusters[I].Low->getValue() != Clusters[I - 1].High->getValue() + 1) { + ContiguousRange = false; + break; + } + } + + if (Low.isStrictlyPositive() && High.slt(BitWidth)) { + // Optimize the case where all the case values fit in a word without having + // to subtract minValue. In this case, we can optimize away the subtraction. LowBound = APInt::getNullValue(Low.getBitWidth()); CmpRange = High; + ContiguousRange = false; } else { LowBound = Low; CmpRange = High - Low; } CaseBitsVector CBV; - uint32_t TotalWeight = 0; + auto TotalProb = BranchProbability::getZero(); for (unsigned i = First; i <= Last; ++i) { // Find the CaseBits for this destination. unsigned j; @@ -7727,39 +7973,40 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, if (CBV[j].BB == Clusters[i].MBB) break; if (j == CBV.size()) - CBV.push_back(CaseBits(0, Clusters[i].MBB, 0, 0)); + CBV.push_back( + CaseBits(0, Clusters[i].MBB, 0, BranchProbability::getZero())); CaseBits *CB = &CBV[j]; - // Update Mask, Bits and ExtraWeight. + // Update Mask, Bits and ExtraProb. uint64_t Lo = (Clusters[i].Low->getValue() - LowBound).getZExtValue(); uint64_t Hi = (Clusters[i].High->getValue() - LowBound).getZExtValue(); assert(Hi >= Lo && Hi < 64 && "Invalid bit case!"); CB->Mask |= (-1ULL >> (63 - (Hi - Lo))) << Lo; CB->Bits += Hi - Lo + 1; - CB->ExtraWeight += Clusters[i].Weight; - TotalWeight += Clusters[i].Weight; - assert(TotalWeight >= Clusters[i].Weight && "Weight overflow!"); + CB->ExtraProb += Clusters[i].Prob; + TotalProb += Clusters[i].Prob; } BitTestInfo BTI; std::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) { - // Sort by weight first, number of bits second. - if (a.ExtraWeight != b.ExtraWeight) - return a.ExtraWeight > b.ExtraWeight; + // Sort by probability first, number of bits second. + if (a.ExtraProb != b.ExtraProb) + return a.ExtraProb > b.ExtraProb; return a.Bits > b.Bits; }); for (auto &CB : CBV) { MachineBasicBlock *BitTestBB = FuncInfo.MF->CreateMachineBasicBlock(SI->getParent()); - BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraWeight)); + BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraProb)); } BitTestCases.emplace_back(std::move(LowBound), std::move(CmpRange), - SI->getCondition(), -1U, MVT::Other, false, nullptr, - nullptr, std::move(BTI)); + SI->getCondition(), -1U, MVT::Other, false, + ContiguousRange, nullptr, nullptr, std::move(BTI), + TotalProb); BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High, - BitTestCases.size() - 1, TotalWeight); + BitTestCases.size() - 1, TotalProb); return true; } @@ -7868,9 +8115,9 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, MachineBasicBlock *DefaultMBB) { MachineFunction *CurMF = FuncInfo.MF; MachineBasicBlock *NextMBB = nullptr; - MachineFunction::iterator BBI = W.MBB; + MachineFunction::iterator BBI(W.MBB); if (++BBI != FuncInfo.MF->end()) - NextMBB = BBI; + NextMBB = &*BBI; unsigned Size = W.LastCluster - W.FirstCluster + 1; @@ -7906,13 +8153,16 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, ISD::SETEQ); // Update successor info. - // Both Small and Big will jump to Small.BB, so we sum up the weights. - addSuccessorWithWeight(SwitchMBB, Small.MBB, Small.Weight + Big.Weight); - addSuccessorWithWeight( - SwitchMBB, DefaultMBB, - // The default destination is the first successor in IR. - BPI ? BPI->getEdgeWeight(SwitchMBB->getBasicBlock(), (unsigned)0) - : 0); + // Both Small and Big will jump to Small.BB, so we sum up the + // probabilities. + addSuccessorWithProb(SwitchMBB, Small.MBB, Small.Prob + Big.Prob); + if (BPI) + addSuccessorWithProb( + SwitchMBB, DefaultMBB, + // The default destination is the first successor in IR. + BPI->getEdgeProbability(SwitchMBB->getBasicBlock(), (unsigned)0)); + else + addSuccessorWithProb(SwitchMBB, DefaultMBB); // Insert the true branch. SDValue BrCond = @@ -7929,17 +8179,17 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, } if (TM.getOptLevel() != CodeGenOpt::None) { - // Order cases by weight so the most likely case will be checked first. + // Order cases by probability so the most likely case will be checked first. std::sort(W.FirstCluster, W.LastCluster + 1, [](const CaseCluster &a, const CaseCluster &b) { - return a.Weight > b.Weight; + return a.Prob > b.Prob; }); // Rearrange the case blocks so that the last one falls through if possible - // without without changing the order of weights. + // without without changing the order of probabilities. for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster; ) { --I; - if (I->Weight > W.LastCluster->Weight) + if (I->Prob > W.LastCluster->Prob) break; if (I->Kind == CC_Range && I->MBB == NextMBB) { std::swap(*I, *W.LastCluster); @@ -7948,12 +8198,11 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, } } - // Compute total weight. - uint32_t UnhandledWeights = 0; - for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I) { - UnhandledWeights += I->Weight; - assert(UnhandledWeights >= I->Weight && "Weight overflow!"); - } + // Compute total probability. + BranchProbability DefaultProb = W.DefaultProb; + BranchProbability UnhandledProbs = DefaultProb; + for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I) + UnhandledProbs += I->Prob; MachineBasicBlock *CurMBB = W.MBB; for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) { @@ -7967,6 +8216,7 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, // Put Cond in a virtual register to make it available from the new blocks. ExportFromCurrentBlock(Cond); } + UnhandledProbs -= I->Prob; switch (I->Kind) { case CC_JumpTable: { @@ -7977,8 +8227,28 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, // The jump block hasn't been inserted yet; insert it here. MachineBasicBlock *JumpMBB = JT->MBB; CurMF->insert(BBI, JumpMBB); - addSuccessorWithWeight(CurMBB, Fallthrough); - addSuccessorWithWeight(CurMBB, JumpMBB); + + auto JumpProb = I->Prob; + auto FallthroughProb = UnhandledProbs; + + // If the default statement is a target of the jump table, we evenly + // distribute the default probability to successors of CurMBB. Also + // update the probability on the edge from JumpMBB to Fallthrough. + for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(), + SE = JumpMBB->succ_end(); + SI != SE; ++SI) { + if (*SI == DefaultMBB) { + JumpProb += DefaultProb / 2; + FallthroughProb -= DefaultProb / 2; + JumpMBB->setSuccProbability(SI, DefaultProb / 2); + JumpMBB->normalizeSuccProbs(); + break; + } + } + + addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb); + addSuccessorWithProb(CurMBB, JumpMBB, JumpProb); + CurMBB->normalizeSuccProbs(); // The jump table header will be inserted in our current block, do the // range check, and fall through to our fallthrough block. @@ -8004,8 +8274,17 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, BTB->Parent = CurMBB; BTB->Default = Fallthrough; - // If we're in the right place, emit the bit test header header right now. - if (CurMBB ==SwitchMBB) { + BTB->DefaultProb = UnhandledProbs; + // If the cases in bit test don't form a contiguous range, we evenly + // distribute the probability on the edge to Fallthrough to two + // successors of CurMBB. + if (!BTB->ContiguousRange) { + BTB->Prob += DefaultProb / 2; + BTB->DefaultProb -= DefaultProb / 2; + } + + // If we're in the right place, emit the bit test header right now. + if (CurMBB == SwitchMBB) { visitBitTestHeader(*BTB, SwitchMBB); BTB->Emitted = true; } @@ -8028,10 +8307,9 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, RHS = I->High; } - // The false weight is the sum of all unhandled cases. - UnhandledWeights -= I->Weight; - CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, I->Weight, - UnhandledWeights); + // The false probability is the sum of all unhandled cases. + CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, I->Prob, + UnhandledProbs); if (CurMBB == SwitchMBB) visitSwitchCase(CB, SwitchMBB); @@ -8049,8 +8327,8 @@ unsigned SelectionDAGBuilder::caseClusterRank(const CaseCluster &CC, CaseClusterIt First, CaseClusterIt Last) { return std::count_if(First, Last + 1, [&](const CaseCluster &X) { - if (X.Weight != CC.Weight) - return X.Weight > CC.Weight; + if (X.Prob != CC.Prob) + return X.Prob > CC.Prob; // Ties are broken by comparing the case value. return X.Low->getValue().slt(CC.Low->getValue()); @@ -8066,24 +8344,24 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!"); - // Balance the tree based on branch weights to create a near-optimal (in terms - // of search time given key frequency) binary search tree. See e.g. Kurt + // Balance the tree based on branch probabilities to create a near-optimal (in + // terms of search time given key frequency) binary search tree. See e.g. Kurt // Mehlhorn "Nearly Optimal Binary Search Trees" (1975). CaseClusterIt LastLeft = W.FirstCluster; CaseClusterIt FirstRight = W.LastCluster; - uint32_t LeftWeight = LastLeft->Weight; - uint32_t RightWeight = FirstRight->Weight; + auto LeftProb = LastLeft->Prob + W.DefaultProb / 2; + auto RightProb = FirstRight->Prob + W.DefaultProb / 2; // Move LastLeft and FirstRight towards each other from opposite directions to - // find a partitioning of the clusters which balances the weight on both - // sides. If LeftWeight and RightWeight are equal, alternate which side is - // taken to ensure 0-weight nodes are distributed evenly. + // find a partitioning of the clusters which balances the probability on both + // sides. If LeftProb and RightProb are equal, alternate which side is + // taken to ensure 0-probability nodes are distributed evenly. unsigned I = 0; while (LastLeft + 1 < FirstRight) { - if (LeftWeight < RightWeight || (LeftWeight == RightWeight && (I & 1))) - LeftWeight += (++LastLeft)->Weight; + if (LeftProb < RightProb || (LeftProb == RightProb && (I & 1))) + LeftProb += (++LastLeft)->Prob; else - RightWeight += (--FirstRight)->Weight; + RightProb += (--FirstRight)->Prob; I++; } @@ -8144,7 +8422,7 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, const ConstantInt *Pivot = PivotCluster->Low; // New blocks will be inserted immediately after the current one. - MachineFunction::iterator BBI = W.MBB; + MachineFunction::iterator BBI(W.MBB); ++BBI; // We will branch to the LHS if Value < Pivot. If LHS is a single cluster, @@ -8158,7 +8436,8 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, } else { LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock()); FuncInfo.MF->insert(BBI, LeftMBB); - WorkList.push_back({LeftMBB, FirstLeft, LastLeft, W.GE, Pivot}); + WorkList.push_back( + {LeftMBB, FirstLeft, LastLeft, W.GE, Pivot, W.DefaultProb / 2}); // Put Cond in a virtual register to make it available from the new blocks. ExportFromCurrentBlock(Cond); } @@ -8173,14 +8452,15 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, } else { RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock()); FuncInfo.MF->insert(BBI, RightMBB); - WorkList.push_back({RightMBB, FirstRight, LastRight, Pivot, W.LT}); + WorkList.push_back( + {RightMBB, FirstRight, LastRight, Pivot, W.LT, W.DefaultProb / 2}); // Put Cond in a virtual register to make it available from the new blocks. ExportFromCurrentBlock(Cond); } // Create the CaseBlock record that will be used to lower the branch. CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB, - LeftWeight, RightWeight); + LeftProb, RightProb); if (W.MBB == SwitchMBB) visitSwitchCase(CB, SwitchMBB); @@ -8196,9 +8476,10 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { for (auto I : SI.cases()) { MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()]; const ConstantInt *CaseVal = I.getCaseValue(); - uint32_t Weight = - BPI ? BPI->getEdgeWeight(SI.getParent(), I.getSuccessorIndex()) : 0; - Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Weight)); + BranchProbability Prob = + BPI ? BPI->getEdgeProbability(SI.getParent(), I.getSuccessorIndex()) + : BranchProbability(1, SI.getNumCases() + 1); + Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Prob)); } MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()]; @@ -8274,7 +8555,8 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { SwitchWorkList WorkList; CaseClusterIt First = Clusters.begin(); CaseClusterIt Last = Clusters.end() - 1; - WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr}); + auto DefaultProb = getEdgeProbability(SwitchMBB, DefaultMBB); + WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultProb}); while (!WorkList.empty()) { SwitchWorkListItem W = WorkList.back(); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 700675453fe7..49a3872d20c8 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -17,6 +17,7 @@ #include "StatepointLowering.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -30,7 +31,6 @@ namespace llvm { class AddrSpaceCastInst; -class AliasAnalysis; class AllocaInst; class BasicBlock; class BitCastInst; @@ -154,39 +154,39 @@ private: unsigned JTCasesIndex; unsigned BTCasesIndex; }; - uint32_t Weight; + BranchProbability Prob; static CaseCluster range(const ConstantInt *Low, const ConstantInt *High, - MachineBasicBlock *MBB, uint32_t Weight) { + MachineBasicBlock *MBB, BranchProbability Prob) { CaseCluster C; C.Kind = CC_Range; C.Low = Low; C.High = High; C.MBB = MBB; - C.Weight = Weight; + C.Prob = Prob; return C; } static CaseCluster jumpTable(const ConstantInt *Low, const ConstantInt *High, unsigned JTCasesIndex, - uint32_t Weight) { + BranchProbability Prob) { CaseCluster C; C.Kind = CC_JumpTable; C.Low = Low; C.High = High; C.JTCasesIndex = JTCasesIndex; - C.Weight = Weight; + C.Prob = Prob; return C; } static CaseCluster bitTests(const ConstantInt *Low, const ConstantInt *High, - unsigned BTCasesIndex, uint32_t Weight) { + unsigned BTCasesIndex, BranchProbability Prob) { CaseCluster C; C.Kind = CC_BitTests; C.Low = Low; C.High = High; C.BTCasesIndex = BTCasesIndex; - C.Weight = Weight; + C.Prob = Prob; return C; } }; @@ -198,13 +198,13 @@ private: uint64_t Mask; MachineBasicBlock* BB; unsigned Bits; - uint32_t ExtraWeight; + BranchProbability ExtraProb; CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits, - uint32_t Weight): - Mask(mask), BB(bb), Bits(bits), ExtraWeight(Weight) { } + BranchProbability Prob): + Mask(mask), BB(bb), Bits(bits), ExtraProb(Prob) { } - CaseBits() : Mask(0), BB(nullptr), Bits(0), ExtraWeight(0) {} + CaseBits() : Mask(0), BB(nullptr), Bits(0) {} }; typedef std::vector<CaseBits> CaseBitsVector; @@ -217,13 +217,13 @@ private: /// blocks needed by multi-case switch statements. struct CaseBlock { CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs, - const Value *cmpmiddle, - MachineBasicBlock *truebb, MachineBasicBlock *falsebb, - MachineBasicBlock *me, - uint32_t trueweight = 0, uint32_t falseweight = 0) - : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs), - TrueBB(truebb), FalseBB(falsebb), ThisBB(me), - TrueWeight(trueweight), FalseWeight(falseweight) { } + const Value *cmpmiddle, MachineBasicBlock *truebb, + MachineBasicBlock *falsebb, MachineBasicBlock *me, + BranchProbability trueprob = BranchProbability::getUnknown(), + BranchProbability falseprob = BranchProbability::getUnknown()) + : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs), + TrueBB(truebb), FalseBB(falsebb), ThisBB(me), TrueProb(trueprob), + FalseProb(falseprob) {} // CC - the condition code to use for the case block's setcc node ISD::CondCode CC; @@ -239,8 +239,8 @@ private: // ThisBB - the block into which to emit the code for the setcc and branches MachineBasicBlock *ThisBB; - // TrueWeight/FalseWeight - branch weights. - uint32_t TrueWeight, FalseWeight; + // TrueProb/FalseProb - branch weights. + BranchProbability TrueProb, FalseProb; }; struct JumpTable { @@ -272,32 +272,35 @@ private: struct BitTestCase { BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr, - uint32_t Weight): - Mask(M), ThisBB(T), TargetBB(Tr), ExtraWeight(Weight) { } + BranchProbability Prob): + Mask(M), ThisBB(T), TargetBB(Tr), ExtraProb(Prob) { } uint64_t Mask; MachineBasicBlock *ThisBB; MachineBasicBlock *TargetBB; - uint32_t ExtraWeight; + BranchProbability ExtraProb; }; typedef SmallVector<BitTestCase, 3> BitTestInfo; struct BitTestBlock { - BitTestBlock(APInt F, APInt R, const Value* SV, - unsigned Rg, MVT RgVT, bool E, - MachineBasicBlock* P, MachineBasicBlock* D, - BitTestInfo C): - First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E), - Parent(P), Default(D), Cases(std::move(C)) { } + BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT, + bool E, bool CR, MachineBasicBlock *P, MachineBasicBlock *D, + BitTestInfo C, BranchProbability Pr) + : First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E), + ContiguousRange(CR), Parent(P), Default(D), Cases(std::move(C)), + Prob(Pr) {} APInt First; APInt Range; const Value *SValue; unsigned Reg; MVT RegVT; bool Emitted; + bool ContiguousRange; MachineBasicBlock *Parent; MachineBasicBlock *Default; BitTestInfo Cases; + BranchProbability Prob; + BranchProbability DefaultProb; }; /// Minimum jump table density, in percent. @@ -339,6 +342,7 @@ private: CaseClusterIt LastCluster; const ConstantInt *GE; const ConstantInt *LT; + BranchProbability DefaultProb; }; typedef SmallVector<SwitchWorkListItem, 4> SwitchWorkList; @@ -515,6 +519,7 @@ private: void resetPerFunctionState() { FailureMBB = nullptr; Guard = nullptr; + GuardReg = 0; } MachineBasicBlock *getParentMBB() { return ParentMBB; } @@ -592,10 +597,6 @@ public: /// FunctionLoweringInfo &FuncInfo; - /// OptLevel - What optimization level we're generating code for. - /// - CodeGenOpt::Level OptLevel; - /// GFI - Garbage collection metadata for the function. GCFunctionInfo *GFI; @@ -613,7 +614,7 @@ public: SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo, CodeGenOpt::Level ol) : CurInst(nullptr), SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), - DAG(dag), FuncInfo(funcinfo), OptLevel(ol), + DAG(dag), FuncInfo(funcinfo), HasTailCall(false) { } @@ -692,19 +693,20 @@ public: void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, - MachineBasicBlock *SwitchBB, unsigned Opc, - uint32_t TW, uint32_t FW); + MachineBasicBlock *SwitchBB, + Instruction::BinaryOps Opc, BranchProbability TW, + BranchProbability FW); void EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, - uint32_t TW, uint32_t FW); + BranchProbability TW, BranchProbability FW); bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases); bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB); void CopyToExportRegsIfNeeded(const Value *V); void ExportFromCurrentBlock(const Value *V); void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall, - MachineBasicBlock *LandingPad = nullptr); + const BasicBlock *EHPadBB = nullptr); std::pair<SDValue, SDValue> lowerCallOperands( ImmutableCallSite CS, @@ -712,7 +714,7 @@ public: unsigned NumArgs, SDValue Callee, Type *ReturnTy, - MachineBasicBlock *LandingPad = nullptr, + const BasicBlock *EHPadBB = nullptr, bool IsPatchPoint = false); /// UpdateSplitBlock - When an MBB was split during scheduling, update the @@ -722,11 +724,11 @@ public: // This function is responsible for the whole statepoint lowering process. // It uniformly handles invoke and call statepoints. void LowerStatepoint(ImmutableStatepoint Statepoint, - MachineBasicBlock *LandingPad = nullptr); + const BasicBlock *EHPadBB = nullptr); private: - std::pair<SDValue, SDValue> lowerInvokable( - TargetLowering::CallLoweringInfo &CLI, - MachineBasicBlock *LandingPad); + std::pair<SDValue, SDValue> + lowerInvokable(TargetLowering::CallLoweringInfo &CLI, + const BasicBlock *EHPadBB = nullptr); // Terminator instructions. void visitRet(const ReturnInst &I); @@ -734,11 +736,18 @@ private: void visitSwitch(const SwitchInst &I); void visitIndirectBr(const IndirectBrInst &I); void visitUnreachable(const UnreachableInst &I); + void visitCleanupRet(const CleanupReturnInst &I); + void visitCatchSwitch(const CatchSwitchInst &I); + void visitCatchRet(const CatchReturnInst &I); + void visitCatchPad(const CatchPadInst &I); + void visitCleanupPad(const CleanupPadInst &CPI); + + BranchProbability getEdgeProbability(const MachineBasicBlock *Src, + const MachineBasicBlock *Dst) const; + void addSuccessorWithProb( + MachineBasicBlock *Src, MachineBasicBlock *Dst, + BranchProbability Prob = BranchProbability::getUnknown()); - uint32_t getEdgeWeight(const MachineBasicBlock *Src, - const MachineBasicBlock *Dst) const; - void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst, - uint32_t Weight = 0); public: void visitSwitchCase(CaseBlock &CB, MachineBasicBlock *SwitchBB); @@ -748,7 +757,7 @@ public: void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB); void visitBitTestCase(BitTestBlock &BB, MachineBasicBlock* NextMBB, - uint32_t BranchWeightToNext, + BranchProbability BranchProbToNext, unsigned Reg, BitTestCase &B, MachineBasicBlock *SwitchBB); @@ -842,7 +851,7 @@ private: void visitVACopy(const CallInst &I); void visitStackmap(const CallInst &I); void visitPatchpoint(ImmutableCallSite CS, - MachineBasicBlock *LandingPad = nullptr); + const BasicBlock *EHPadBB = nullptr); // These three are implemented in StatepointLowering.cpp void visitStatepoint(const CallInst &I); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 5b9b18286fae..a1c6c4c1dd63 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GraphWriter.h" +#include "llvm/Support/Printable.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" @@ -30,6 +31,11 @@ #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; +static cl::opt<bool> +VerboseDAGDumping("dag-dump-verbose", cl::Hidden, + cl::desc("Display more information when dumping selection " + "DAG nodes.")); + std::string SDNode::getOperationName(const SelectionDAG *G) const { switch (getOpcode()) { default: @@ -102,6 +108,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::EH_RETURN: return "EH_RETURN"; case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP"; case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP"; + case ISD::EH_SJLJ_SETUP_DISPATCH: return "EH_SJLJ_SETUP_DISPATCH"; case ISD::ConstantPool: return "ConstantPool"; case ISD::TargetIndex: return "TargetIndex"; case ISD::ExternalSymbol: return "ExternalSymbol"; @@ -145,6 +152,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FABS: return "fabs"; case ISD::FMINNUM: return "fminnum"; case ISD::FMAXNUM: return "fmaxnum"; + case ISD::FMINNAN: return "fminnan"; + case ISD::FMAXNAN: return "fmaxnan"; case ISD::FNEG: return "fneg"; case ISD::FSQRT: return "fsqrt"; case ISD::FSIN: return "fsin"; @@ -201,6 +210,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FPOWI: return "fpowi"; case ISD::SETCC: return "setcc"; + case ISD::SETCCE: return "setcce"; case ISD::SELECT: return "select"; case ISD::VSELECT: return "vselect"; case ISD::SELECT_CC: return "select_cc"; @@ -273,6 +283,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::CALLSEQ_START: return "callseq_start"; case ISD::CALLSEQ_END: return "callseq_end"; + // EH instructions + case ISD::CATCHRET: return "catchret"; + case ISD::CLEANUPRET: return "cleanupret"; + // Other operators case ISD::LOAD: return "load"; case ISD::STORE: return "store"; @@ -295,15 +309,17 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::LIFETIME_END: return "lifetime.end"; case ISD::GC_TRANSITION_START: return "gc_transition.start"; case ISD::GC_TRANSITION_END: return "gc_transition.end"; + case ISD::GET_DYNAMIC_AREA_OFFSET: return "get.dynamic.area.offset"; // Bit manipulation + case ISD::BITREVERSE: return "bitreverse"; case ISD::BSWAP: return "bswap"; case ISD::CTPOP: return "ctpop"; case ISD::CTTZ: return "cttz"; case ISD::CTTZ_ZERO_UNDEF: return "cttz_zero_undef"; case ISD::CTLZ: return "ctlz"; case ISD::CTLZ_ZERO_UNDEF: return "ctlz_zero_undef"; - + // Trampolines case ISD::INIT_TRAMPOLINE: return "init_trampoline"; case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline"; @@ -320,7 +336,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::SETO: return "seto"; case ISD::SETUO: return "setuo"; - case ISD::SETUEQ: return "setue"; + case ISD::SETUEQ: return "setueq"; case ISD::SETUGT: return "setugt"; case ISD::SETUGE: return "setuge"; case ISD::SETULT: return "setult"; @@ -352,6 +368,16 @@ const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) { } } +static Printable PrintNodeId(const SDNode &Node) { + return Printable([&Node](raw_ostream &OS) { +#ifndef NDEBUG + OS << 't' << Node.PersistentId; +#else + OS << (const void*)&Node; +#endif + }); +} + void SDNode::dump() const { dump(nullptr); } void SDNode::dump(const SelectionDAG *G) const { print(dbgs(), G); @@ -359,8 +385,6 @@ void SDNode::dump(const SelectionDAG *G) const { } void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const { - OS << (const void*)this << ": "; - for (unsigned i = 0, e = getNumValues(); i != e; ++i) { if (i) OS << ","; if (getValueType(i) == MVT::Other) @@ -368,7 +392,6 @@ void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const { else OS << getValueType(i).getEVTString(); } - OS << " = " << getOperationName(G); } void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { @@ -523,48 +546,58 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { << ']'; } - if (unsigned Order = getIROrder()) - OS << " [ORD=" << Order << ']'; + if (VerboseDAGDumping) { + if (unsigned Order = getIROrder()) + OS << " [ORD=" << Order << ']'; - if (getNodeId() != -1) - OS << " [ID=" << getNodeId() << ']'; + if (getNodeId() != -1) + OS << " [ID=" << getNodeId() << ']'; - if (!G) - return; + if (!G) + return; - DILocation *L = getDebugLoc(); - if (!L) - return; + DILocation *L = getDebugLoc(); + if (!L) + return; + + if (auto *Scope = L->getScope()) + OS << Scope->getFilename(); + else + OS << "<unknown>"; + OS << ':' << L->getLine(); + if (unsigned C = L->getColumn()) + OS << ':' << C; + } +} - if (auto *Scope = L->getScope()) - OS << Scope->getFilename(); - else - OS << "<unknown>"; - OS << ':' << L->getLine(); - if (unsigned C = L->getColumn()) - OS << ':' << C; +/// Return true if this node is so simple that we should just print it inline +/// if it appears as an operand. +static bool shouldPrintInline(const SDNode &Node) { + if (Node.getOpcode() == ISD::EntryToken) + return false; + return Node.getNumOperands() == 0; } static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { - for (const SDValue &Op : N->op_values()) + for (const SDValue &Op : N->op_values()) { + if (shouldPrintInline(*Op.getNode())) + continue; if (Op.getNode()->hasOneUse()) DumpNodes(Op.getNode(), indent+2, G); - else - dbgs() << "\n" << std::string(indent+2, ' ') - << (void*)Op.getNode() << ": <multiple use>"; + } - dbgs() << '\n'; dbgs().indent(indent); N->dump(G); } void SelectionDAG::dump() const { - dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:"; + dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:\n"; for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ++I) { - const SDNode *N = I; - if (!N->hasOneUse() && N != getRoot().getNode()) + const SDNode *N = &*I; + if (!N->hasOneUse() && N != getRoot().getNode() && + (!shouldPrintInline(*N) || N->use_empty())) DumpNodes(N, 2, this); } @@ -573,10 +606,30 @@ void SelectionDAG::dump() const { } void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const { + OS << PrintNodeId(*this) << ": "; print_types(OS, G); + OS << " = " << getOperationName(G); print_details(OS, G); } +static bool printOperand(raw_ostream &OS, const SelectionDAG *G, + const SDValue Value) { + if (!Value.getNode()) { + OS << "<null>"; + return false; + } else if (shouldPrintInline(*Value.getNode())) { + OS << Value->getOperationName(G) << ':'; + Value->print_types(OS, G); + Value->print_details(OS, G); + return true; + } else { + OS << PrintNodeId(*Value.getNode()); + if (unsigned RN = Value.getResNo()) + OS << ':' << RN; + return false; + } +} + typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet; static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, const SelectionDAG *G, VisitedSDNodeSet &once) { @@ -589,20 +642,13 @@ static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, // Having printed this SDNode, walk the children: for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - const SDNode *child = N->getOperand(i).getNode(); - if (i) OS << ","; OS << " "; - if (child->getNumOperands() == 0) { - // This child has no grandchildren; print it inline right here. - child->printr(OS, G); - once.insert(child); - } else { // Just the address. FIXME: also print the child's opcode. - OS << (const void*)child; - if (unsigned RN = N->getOperand(i).getResNo()) - OS << ":" << RN; - } + const SDValue Op = N->getOperand(i); + bool printedInline = printOperand(OS, G, Op); + if (printedInline) + once.insert(Op.getNode()); } OS << "\n"; @@ -664,12 +710,9 @@ void SDNode::dumprFull(const SelectionDAG *G) const { } void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const { - print_types(OS, G); + printr(OS, G); for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { if (i) OS << ", "; else OS << " "; - OS << (void*)getOperand(i).getNode(); - if (unsigned RN = getOperand(i).getResNo()) - OS << ":" << RN; + printOperand(OS, G, getOperand(i)); } - print_details(OS, G); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 97ece8b9248a..853a21a15eb9 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -19,6 +19,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" @@ -263,13 +264,17 @@ namespace llvm { return; IS.OptLevel = NewOptLevel; IS.TM.setOptLevel(NewOptLevel); - SavedFastISel = IS.TM.Options.EnableFastISel; - if (NewOptLevel == CodeGenOpt::None) - IS.TM.setFastISel(true); DEBUG(dbgs() << "\nChanging optimization level for Function " << IS.MF->getFunction()->getName() << "\n"); DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel << " ; After: -O" << NewOptLevel << "\n"); + SavedFastISel = IS.TM.Options.EnableFastISel; + if (NewOptLevel == CodeGenOpt::None) { + IS.TM.setFastISel(IS.TM.getO0WantsFastISel()); + DEBUG(dbgs() << "\tFastISel is " + << (IS.TM.Options.EnableFastISel ? "enabled" : "disabled") + << "\n"); + } } ~OptLevelChanger() { @@ -293,6 +298,11 @@ namespace llvm { const TargetLowering *TLI = IS->TLI; const TargetSubtargetInfo &ST = IS->MF->getSubtarget(); + // Try first to see if the Target has its own way of selecting a scheduler + if (auto *SchedulerCtor = ST.getDAGScheduler(OptLevel)) { + return SchedulerCtor(IS, OptLevel); + } + if (OptLevel == CodeGenOpt::None || (ST.enableMachineScheduler() && ST.enableMachineSchedDefaultSched()) || TLI->getSchedulingPreference() == Sched::Source) @@ -350,8 +360,9 @@ SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, OptLevel(OL), DAGSize(0) { initializeGCModuleInfoPass(*PassRegistry::getPassRegistry()); - initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry()); - initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry()); + initializeBranchProbabilityInfoWrapperPassPass( + *PassRegistry::getPassRegistry()); + initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry()); initializeTargetLibraryInfoWrapperPassPass( *PassRegistry::getPassRegistry()); } @@ -363,13 +374,12 @@ SelectionDAGISel::~SelectionDAGISel() { } void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<AliasAnalysis>(); - AU.addPreserved<AliasAnalysis>(); + AU.addRequired<AAResultsWrapperPass>(); AU.addRequired<GCModuleInfo>(); AU.addPreserved<GCModuleInfo>(); AU.addRequired<TargetLibraryInfoWrapperPass>(); if (UseMBPI && OptLevel != CodeGenOpt::None) - AU.addRequired<BranchProbabilityInfo>(); + AU.addRequired<BranchProbabilityInfoWrapperPass>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -380,10 +390,10 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { /// /// This is required for correctness, so it must be done at -O0. /// -static void SplitCriticalSideEffectEdges(Function &Fn, AliasAnalysis *AA) { +static void SplitCriticalSideEffectEdges(Function &Fn) { // Loop for blocks with phi nodes. - for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { - PHINode *PN = dyn_cast<PHINode>(BB->begin()); + for (BasicBlock &BB : Fn) { + PHINode *PN = dyn_cast<PHINode>(BB.begin()); if (!PN) continue; ReprocessBlock: @@ -391,7 +401,7 @@ static void SplitCriticalSideEffectEdges(Function &Fn, AliasAnalysis *AA) { // are potentially trapping constant expressions. Constant expressions are // the only potentially trapping value that can occur as the argument to a // PHI. - for (BasicBlock::iterator I = BB->begin(); (PN = dyn_cast<PHINode>(I)); ++I) + for (BasicBlock::iterator I = BB.begin(); (PN = dyn_cast<PHINode>(I)); ++I) for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { ConstantExpr *CE = dyn_cast<ConstantExpr>(PN->getIncomingValue(i)); if (!CE || !CE->canTrap()) continue; @@ -405,8 +415,8 @@ static void SplitCriticalSideEffectEdges(Function &Fn, AliasAnalysis *AA) { // Okay, we have to split this edge. SplitCriticalEdge( - Pred->getTerminator(), GetSuccessorNumber(Pred, BB), - CriticalEdgeSplittingOptions(AA).setMergeIdenticalEdges()); + Pred->getTerminator(), GetSuccessorNumber(Pred, &BB), + CriticalEdgeSplittingOptions().setMergeIdenticalEdges()); goto ReprocessBlock; } } @@ -437,19 +447,19 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { TII = MF->getSubtarget().getInstrInfo(); TLI = MF->getSubtarget().getTargetLowering(); RegInfo = &MF->getRegInfo(); - AA = &getAnalysis<AliasAnalysis>(); + AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr; DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); - SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), AA); + SplitCriticalSideEffectEdges(const_cast<Function &>(Fn)); CurDAG->init(*MF); FuncInfo->set(Fn, *MF, CurDAG); if (UseMBPI && OptLevel != CodeGenOpt::None) - FuncInfo->BPI = &getAnalysis<BranchProbabilityInfo>(); + FuncInfo->BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI(); else FuncInfo->BPI = nullptr; @@ -457,15 +467,50 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { MF->setHasInlineAsm(false); + FuncInfo->SplitCSR = false; + SmallVector<MachineBasicBlock*, 4> Returns; + + // We split CSR if the target supports it for the given function + // and the function has only return exits. + if (TLI->supportSplitCSR(MF)) { + FuncInfo->SplitCSR = true; + + // Collect all the return blocks. + for (const BasicBlock &BB : Fn) { + if (!succ_empty(&BB)) + continue; + + const TerminatorInst *Term = BB.getTerminator(); + if (isa<UnreachableInst>(Term)) + continue; + if (isa<ReturnInst>(Term)) { + Returns.push_back(FuncInfo->MBBMap[&BB]); + continue; + } + + // Bail out if the exit block is not Return nor Unreachable. + FuncInfo->SplitCSR = false; + break; + } + } + + MachineBasicBlock *EntryMBB = &MF->front(); + if (FuncInfo->SplitCSR) + // This performs initialization so lowering for SplitCSR will be correct. + TLI->initializeSplitCSR(EntryMBB); + SelectAllBasicBlocks(Fn); // If the first basic block in the function has live ins that need to be // copied into vregs, emit the copies into the top of the block before // emitting the code for the block. - MachineBasicBlock *EntryMBB = MF->begin(); const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo(); RegInfo->EmitLiveInCopies(EntryMBB, TRI, *TII); + // Insert copies in the entry block and the return blocks. + if (FuncInfo->SplitCSR) + TLI->insertCopiesSplitCSR(EntryMBB, Returns); + DenseMap<unsigned, unsigned> LiveInMap; if (!FuncInfo->ArgDbgValues.empty()) for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(), @@ -882,7 +927,7 @@ void SelectionDAGISel::DoInstructionSelection() { // graph) and preceding back toward the beginning (the entry // node). while (ISelPosition != CurDAG->allnodes_begin()) { - SDNode *Node = --ISelPosition; + SDNode *Node = &*--ISelPosition; // Skip dead nodes. DAGCombiner is expected to eliminate all dead nodes, // but there are currently some corner cases that it misses. Also, this // makes it theoretically possible to disable the DAGCombiner. @@ -916,14 +961,47 @@ void SelectionDAGISel::DoInstructionSelection() { PostprocessISelDAG(); } +static bool hasExceptionPointerOrCodeUser(const CatchPadInst *CPI) { + for (const User *U : CPI->users()) { + if (const IntrinsicInst *EHPtrCall = dyn_cast<IntrinsicInst>(U)) { + Intrinsic::ID IID = EHPtrCall->getIntrinsicID(); + if (IID == Intrinsic::eh_exceptionpointer || + IID == Intrinsic::eh_exceptioncode) + return true; + } + } + return false; +} + /// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and /// do other setup for EH landing-pad blocks. bool SelectionDAGISel::PrepareEHLandingPad() { MachineBasicBlock *MBB = FuncInfo->MBB; - + const Constant *PersonalityFn = FuncInfo->Fn->getPersonalityFn(); + const BasicBlock *LLVMBB = MBB->getBasicBlock(); const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy(CurDAG->getDataLayout())); + // Catchpads have one live-in register, which typically holds the exception + // pointer or code. + if (const auto *CPI = dyn_cast<CatchPadInst>(LLVMBB->getFirstNonPHI())) { + if (hasExceptionPointerOrCodeUser(CPI)) { + // Get or create the virtual register to hold the pointer or code. Mark + // the live in physreg and copy into the vreg. + MCPhysReg EHPhysReg = TLI->getExceptionPointerRegister(PersonalityFn); + assert(EHPhysReg && "target lacks exception pointer register"); + MBB->addLiveIn(EHPhysReg); + unsigned VReg = FuncInfo->getCatchPadExceptionPointerVReg(CPI, PtrRC); + BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), + TII->get(TargetOpcode::COPY), VReg) + .addReg(EHPhysReg, RegState::Kill); + } + return true; + } + + if (!LLVMBB->isLandingPad()) + return true; + // Add a label to mark the beginning of the landing pad. Deletion of the // landing pad can thus be detected via the MachineModuleInfo. MCSymbol *Label = MF->getMMI().addLandingPad(MBB); @@ -935,52 +1013,12 @@ bool SelectionDAGISel::PrepareEHLandingPad() { BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II) .addSym(Label); - // If this is an MSVC-style personality function, we need to split the landing - // pad into several BBs. - const BasicBlock *LLVMBB = MBB->getBasicBlock(); - const LandingPadInst *LPadInst = LLVMBB->getLandingPadInst(); - MF->getMMI().addPersonality(MBB, cast<Function>(LPadInst->getParent() - ->getParent() - ->getPersonalityFn() - ->stripPointerCasts())); - EHPersonality Personality = MF->getMMI().getPersonalityType(); - - if (isMSVCEHPersonality(Personality)) { - SmallVector<MachineBasicBlock *, 4> ClauseBBs; - const IntrinsicInst *ActionsCall = - dyn_cast<IntrinsicInst>(LLVMBB->getFirstInsertionPt()); - // Get all invoke BBs that unwind to this landingpad. - SmallVector<MachineBasicBlock *, 4> InvokeBBs(MBB->pred_begin(), - MBB->pred_end()); - if (ActionsCall && ActionsCall->getIntrinsicID() == Intrinsic::eh_actions) { - // If this is a call to llvm.eh.actions followed by indirectbr, then we've - // run WinEHPrepare, and we should remove this block from the machine CFG. - // Mark the targets of the indirectbr as landingpads instead. - for (const BasicBlock *LLVMSucc : successors(LLVMBB)) { - MachineBasicBlock *ClauseBB = FuncInfo->MBBMap[LLVMSucc]; - // Add the edge from the invoke to the clause. - for (MachineBasicBlock *InvokeBB : InvokeBBs) - InvokeBB->addSuccessor(ClauseBB); - - // Mark the clause as a landing pad or MI passes will delete it. - ClauseBB->setIsLandingPad(); - } - } - - // Remove the edge from the invoke to the lpad. - for (MachineBasicBlock *InvokeBB : InvokeBBs) - InvokeBB->removeSuccessor(MBB); - - // Don't select instructions for the landingpad. - return false; - } - // Mark exception register as live in. - if (unsigned Reg = TLI->getExceptionPointerRegister()) + if (unsigned Reg = TLI->getExceptionPointerRegister(PersonalityFn)) FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC); // Mark exception selector register as live in. - if (unsigned Reg = TLI->getExceptionSelectorRegister()) + if (unsigned Reg = TLI->getExceptionSelectorRegister(PersonalityFn)) FuncInfo->ExceptionSelectorVirtReg = MBB->addLiveIn(Reg, PtrRC); return true; @@ -992,9 +1030,9 @@ bool SelectionDAGISel::PrepareEHLandingPad() { static bool isFoldedOrDeadInstruction(const Instruction *I, FunctionLoweringInfo *FuncInfo) { return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded. - !isa<TerminatorInst>(I) && // Terminators aren't folded. + !isa<TerminatorInst>(I) && // Terminators aren't folded. !isa<DbgInfoIntrinsic>(I) && // Debug instructions aren't folded. - !isa<LandingPadInst>(I) && // Landingpad instructions aren't folded. + !I->isEHPad() && // EH pad instructions aren't folded. !FuncInfo->isExportedInst(I); // Exported instrs must be computed. } @@ -1143,17 +1181,20 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FuncInfo->VisitedBBs.insert(LLVMBB); } - BasicBlock::const_iterator const Begin = LLVMBB->getFirstNonPHI(); + BasicBlock::const_iterator const Begin = + LLVMBB->getFirstNonPHI()->getIterator(); BasicBlock::const_iterator const End = LLVMBB->end(); BasicBlock::const_iterator BI = End; FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB]; + if (!FuncInfo->MBB) + continue; // Some blocks like catchpads have no code or MBB. FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI(); // Setup an EH landing-pad block. FuncInfo->ExceptionPointerVirtReg = 0; FuncInfo->ExceptionSelectorVirtReg = 0; - if (LLVMBB->isLandingPad()) + if (LLVMBB->isEHPad()) if (!PrepareEHLandingPad()) continue; @@ -1192,7 +1233,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { unsigned NumFastIselRemaining = std::distance(Begin, End); // Do FastISel on as many instructions as possible. for (; BI != Begin; --BI) { - const Instruction *Inst = std::prev(BI); + const Instruction *Inst = &*std::prev(BI); // If we no longer require this instruction, skip it. if (isFoldedOrDeadInstruction(Inst, FuncInfo)) { @@ -1212,8 +1253,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // then see if there is a load right before the selected instructions. // Try to fold the load if so. const Instruction *BeforeInst = Inst; - while (BeforeInst != Begin) { - BeforeInst = std::prev(BasicBlock::const_iterator(BeforeInst)); + while (BeforeInst != &*Begin) { + BeforeInst = &*std::prev(BasicBlock::const_iterator(BeforeInst)); if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo)) break; } @@ -1245,7 +1286,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // For the purpose of debugging, just abort. report_fatal_error("FastISel didn't select the entire block"); - if (!Inst->getType()->isVoidTy() && !Inst->use_empty()) { + if (!Inst->getType()->isVoidTy() && !Inst->getType()->isTokenTy() && + !Inst->use_empty()) { unsigned &R = FuncInfo->ValueMap[Inst]; if (!R) R = FuncInfo->CreateRegs(Inst->getType()); @@ -1253,7 +1295,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { bool HadTailCall = false; MachineBasicBlock::iterator SavedInsertPt = FuncInfo->InsertPt; - SelectBasicBlock(Inst, BI, HadTailCall); + SelectBasicBlock(Inst->getIterator(), BI, HadTailCall); // If the call was emitted as a tail call, we're done with the block. // We also need to delete any previously emitted instructions. @@ -1483,35 +1525,39 @@ SelectionDAGISel::FinishBasicBlock() { CodeGenAndEmitDAG(); } - uint32_t UnhandledWeight = 0; - for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) - UnhandledWeight += SDB->BitTestCases[i].Cases[j].ExtraWeight; - + BranchProbability UnhandledProb = SDB->BitTestCases[i].Prob; for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) { - UnhandledWeight -= SDB->BitTestCases[i].Cases[j].ExtraWeight; + UnhandledProb -= SDB->BitTestCases[i].Cases[j].ExtraProb; // Set the current basic block to the mbb we wish to insert the code into FuncInfo->MBB = SDB->BitTestCases[i].Cases[j].ThisBB; FuncInfo->InsertPt = FuncInfo->MBB->end(); // Emit the code - if (j+1 != ej) - SDB->visitBitTestCase(SDB->BitTestCases[i], - SDB->BitTestCases[i].Cases[j+1].ThisBB, - UnhandledWeight, - SDB->BitTestCases[i].Reg, - SDB->BitTestCases[i].Cases[j], - FuncInfo->MBB); + + // If all cases cover a contiguous range, it is not necessary to jump to + // the default block after the last bit test fails. This is because the + // range check during bit test header creation has guaranteed that every + // case here doesn't go outside the range. + MachineBasicBlock *NextMBB; + if (SDB->BitTestCases[i].ContiguousRange && j + 2 == ej) + NextMBB = SDB->BitTestCases[i].Cases[j + 1].TargetBB; + else if (j + 1 != ej) + NextMBB = SDB->BitTestCases[i].Cases[j + 1].ThisBB; else - SDB->visitBitTestCase(SDB->BitTestCases[i], - SDB->BitTestCases[i].Default, - UnhandledWeight, - SDB->BitTestCases[i].Reg, - SDB->BitTestCases[i].Cases[j], - FuncInfo->MBB); + NextMBB = SDB->BitTestCases[i].Default; + SDB->visitBitTestCase(SDB->BitTestCases[i], + NextMBB, + UnhandledProb, + SDB->BitTestCases[i].Reg, + SDB->BitTestCases[i].Cases[j], + FuncInfo->MBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); CodeGenAndEmitDAG(); + + if (SDB->BitTestCases[i].ContiguousRange && j + 2 == ej) + break; } // Update PHI Nodes @@ -1642,14 +1688,7 @@ SelectionDAGISel::FinishBasicBlock() { /// one preferred by the target. /// ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() { - RegisterScheduler::FunctionPassCtor Ctor = RegisterScheduler::getDefault(); - - if (!Ctor) { - Ctor = ISHeuristic; - RegisterScheduler::setDefault(Ctor); - } - - return Ctor(this, OptLevel); + return ISHeuristic(this, OptLevel); } //===----------------------------------------------------------------------===// @@ -1961,7 +2000,7 @@ SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) { } /// GetVBR - decode a vbr encoding whose top bit is set. -LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline uint64_t GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) { assert(Val >= 128 && "Not a VBR"); Val &= 127; // Remove first vbr bit. @@ -2287,7 +2326,7 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, } /// CheckSame - Implements OP_CheckSame. -LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) { @@ -2298,7 +2337,7 @@ CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, } /// CheckChildSame - Implements OP_CheckChildXSame. -LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckChildSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes, @@ -2310,20 +2349,20 @@ CheckChildSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, } /// CheckPatternPredicate - Implements OP_CheckPatternPredicate. -LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex, const SelectionDAGISel &SDISel) { return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]); } /// CheckNodePredicate - Implements OP_CheckNodePredicate. -LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex, const SelectionDAGISel &SDISel, SDNode *N) { return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]); } -LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDNode *N) { uint16_t Opc = MatcherTable[MatcherIndex++]; @@ -2331,7 +2370,7 @@ CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex, return N->getOpcode() == Opc; } -LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering *TLI, const DataLayout &DL) { MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; @@ -2341,7 +2380,7 @@ CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, return VT == MVT::iPTR && N.getValueType() == TLI->getPointerTy(DL); } -LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering *TLI, const DataLayout &DL, unsigned ChildNo) { @@ -2351,14 +2390,14 @@ CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex, DL); } -LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N) { return cast<CondCodeSDNode>(N)->get() == (ISD::CondCode)MatcherTable[MatcherIndex++]; } -LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering *TLI, const DataLayout &DL) { MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; @@ -2369,7 +2408,7 @@ CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex, return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI->getPointerTy(DL); } -LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N) { int64_t Val = MatcherTable[MatcherIndex++]; @@ -2380,7 +2419,7 @@ CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, return C && C->getSExtValue() == Val; } -LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckChildInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, unsigned ChildNo) { if (ChildNo >= N.getNumOperands()) @@ -2388,7 +2427,7 @@ CheckChildInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, return ::CheckInteger(MatcherTable, MatcherIndex, N.getOperand(ChildNo)); } -LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const SelectionDAGISel &SDISel) { int64_t Val = MatcherTable[MatcherIndex++]; @@ -2401,7 +2440,7 @@ CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, return C && SDISel.CheckAndMask(N.getOperand(0), C, Val); } -LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const SelectionDAGISel &SDISel) { int64_t Val = MatcherTable[MatcherIndex++]; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index 4df5ede388fc..2764688518c2 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -80,9 +80,16 @@ namespace llvm { return true; } - static bool hasNodeAddressLabel(const SDNode *Node, - const SelectionDAG *Graph) { - return true; + static std::string getNodeIdentifierLabel(const SDNode *Node, + const SelectionDAG *Graph) { + std::string R; + raw_string_ostream OS(R); +#ifndef NDEBUG + OS << 't' << Node->PersistentId; +#else + OS << static_cast<const void *>(Node); +#endif + return R; } /// If you want to override the dot attributes printed for a particular diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 34688df4765b..050ec2116c5d 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -95,6 +96,9 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType, SDValue SpillSlot = Builder.DAG.CreateStackTemporary(ValueType); const unsigned FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); + auto *MFI = Builder.DAG.getMachineFunction().getFrameInfo(); + MFI->markAsStatepointSpillSlotObjectIndex(FI); + Builder.FuncInfo.StatepointStackSlots.push_back(FI); AllocatedStackSlots.push_back(true); return SpillSlot; @@ -105,8 +109,8 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType, return Builder.DAG.getFrameIndex(FI, ValueType); } // Note: We deliberately choose to advance this only on the failing path. - // Doing so on the suceeding path involes a bit of complexity that caused a - // minor bug previously. Unless performance shows this matters, please + // Doing so on the succeeding path involves a bit of complexity that caused + // a minor bug previously. Unless performance shows this matters, please // keep this code as simple as possible. NextSlotToAllocate++; } @@ -119,7 +123,7 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType, static Optional<int> findPreviousSpillSlot(const Value *Val, SelectionDAGBuilder &Builder, int LookUpDepth) { - // Can not look any futher - give up now + // Can not look any further - give up now if (LookUpDepth <= 0) return Optional<int>(); @@ -196,7 +200,7 @@ static Optional<int> findPreviousSpillSlot(const Value *Val, /// Try to find existing copies of the incoming values in stack slots used for /// statepoint spilling. If we can find a spill slot for the incoming value, /// mark that slot as allocated, and reuse the same slot for this safepoint. -/// This helps to avoid series of loads and stores that only serve to resuffle +/// This helps to avoid series of loads and stores that only serve to reshuffle /// values on the stack between calls. static void reservePreviousStackSlotForValue(const Value *IncomingValue, SelectionDAGBuilder &Builder) { @@ -255,7 +259,7 @@ static void removeDuplicatesGCPtrs(SmallVectorImpl<const Value *> &Bases, SmallVectorImpl<const Value *> &Relocs, SelectionDAGBuilder &Builder) { - // This is horribly ineffecient, but I don't care right now + // This is horribly inefficient, but I don't care right now SmallSet<SDValue, 64> Seen; SmallVector<const Value *, 64> NewBases, NewPtrs, NewRelocs; @@ -283,13 +287,29 @@ static void removeDuplicatesGCPtrs(SmallVectorImpl<const Value *> &Bases, /// call node. Also update NodeMap so that getValue(statepoint) will /// reference lowered call result static SDNode * -lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad, +lowerCallFromStatepoint(ImmutableStatepoint ISP, const BasicBlock *EHPadBB, SelectionDAGBuilder &Builder, SmallVectorImpl<SDValue> &PendingExports) { ImmutableCallSite CS(ISP.getCallSite()); - SDValue ActualCallee = Builder.getValue(ISP.getCalledValue()); + SDValue ActualCallee; + + if (ISP.getNumPatchBytes() > 0) { + // If we've been asked to emit a nop sequence instead of a call instruction + // for this statepoint then don't lower the call target, but use a constant + // `null` instead. Not lowering the call target lets statepoint clients get + // away without providing a physical address for the symbolic call target at + // link time. + + const auto &TLI = Builder.DAG.getTargetLoweringInfo(); + const auto &DL = Builder.DAG.getDataLayout(); + + unsigned AS = ISP.getCalledValue()->getType()->getPointerAddressSpace(); + ActualCallee = Builder.DAG.getConstant(0, Builder.getCurSDLoc(), + TLI.getPointerTy(DL, AS)); + } else + ActualCallee = Builder.getValue(ISP.getCalledValue()); assert(CS.getCallingConv() != CallingConv::AnyReg && "anyregcc is not supported on statepoints!"); @@ -300,7 +320,7 @@ lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad, SDValue ReturnValue, CallEndVal; std::tie(ReturnValue, CallEndVal) = Builder.lowerCallOperands( ISP.getCallSite(), ImmutableStatepoint::CallArgsBeginPos, - ISP.getNumCallArgs(), ActualCallee, DefTy, LandingPad, + ISP.getNumCallArgs(), ActualCallee, DefTy, EHPadBB, false /* IsPatchPoint */); SDNode *CallEnd = CallEndVal.getNode(); @@ -317,25 +337,33 @@ lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad, // ch, glue = callseq_end ch, glue // get_return_value ch, glue // - // get_return_value can either be a CopyFromReg to grab the return value from - // %RAX, or it can be a LOAD to load a value returned by reference via a stack - // slot. + // get_return_value can either be a sequence of CopyFromReg instructions + // to grab the return value from the return register(s), or it can be a LOAD + // to load a value returned by reference via a stack slot. - if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg || - CallEnd->getOpcode() == ISD::LOAD)) - CallEnd = CallEnd->getOperand(0).getNode(); + if (HasDef) { + if (CallEnd->getOpcode() == ISD::LOAD) + CallEnd = CallEnd->getOperand(0).getNode(); + else + while (CallEnd->getOpcode() == ISD::CopyFromReg) + CallEnd = CallEnd->getOperand(0).getNode(); + } assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && "expected!"); - if (HasDef) { - if (CS.isInvoke()) { - // Result value will be used in different basic block for invokes - // so we need to export it now. But statepoint call has a different type - // than the actuall call. It means that standart exporting mechanism will - // create register of the wrong type. So instead we need to create - // register with correct type and save value into it manually. + // Export the result value if needed + const Instruction *GCResult = ISP.getGCResult(); + if (HasDef && GCResult) { + if (GCResult->getParent() != CS.getParent()) { + // Result value will be used in a different basic block so we need to + // export it now. + // Default exporting mechanism will not work here because statepoint call + // has a different type than the actual call. It means that by default + // llvm will create export register of the wrong type (always i32 in our + // case). So instead we need to create export register with correct type + // manually. // TODO: To eliminate this problem we can remove gc.result intrinsics - // completelly and make statepoint call to return a tuple. + // completely and make statepoint call to return a tuple. unsigned Reg = Builder.FuncInfo.CreateRegs(ISP.getActualReturnType()); RegsForValue RFV( *Builder.DAG.getContext(), Builder.DAG.getTargetLoweringInfo(), @@ -347,8 +375,9 @@ lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad, PendingExports.push_back(Chain); Builder.FuncInfo.ValueMap[CS.getInstruction()] = Reg; } else { - // The value of the statepoint itself will be the value of call itself. - // We'll replace the actually call node shortly. gc_result will grab + // Result value will be used in a same basic block. Don't export it or + // perform any explicit register copies. + // We'll replace the actuall call node shortly. gc_result will grab // this value. Builder.setValue(CS.getInstruction(), ReturnValue); } @@ -411,7 +440,8 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, // chaining stores one after another, this may allow // a bit more optimal scheduling for them Chain = Builder.DAG.getStore(Chain, Builder.getCurSDLoc(), Incoming, Loc, - MachinePointerInfo::getFixedStack(Index), + MachinePointerInfo::getFixedStack( + Builder.DAG.getMachineFunction(), Index), false, false, 0); Builder.StatepointLowering.setLocation(Incoming, Loc); @@ -483,21 +513,21 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, // to the GCStrategy from there (yet). GCStrategy &S = Builder.GFI->getStrategy(); for (const Value *V : Bases) { - auto Opt = S.isGCManagedPointer(V); + auto Opt = S.isGCManagedPointer(V->getType()); if (Opt.hasValue()) { assert(Opt.getValue() && "non gc managed base pointer found in statepoint"); } } for (const Value *V : Ptrs) { - auto Opt = S.isGCManagedPointer(V); + auto Opt = S.isGCManagedPointer(V->getType()); if (Opt.hasValue()) { assert(Opt.getValue() && "non gc managed derived pointer found in statepoint"); } } for (const Value *V : Relocations) { - auto Opt = S.isGCManagedPointer(V); + auto Opt = S.isGCManagedPointer(V->getType()); if (Opt.hasValue()) { assert(Opt.getValue() && "non gc managed pointer relocated"); } @@ -581,19 +611,21 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, SpillMap[V] = cast<FrameIndexSDNode>(Loc)->getIndex(); } else { // Record value as visited, but not spilled. This is case for allocas - // and constants. For this values we can avoid emiting spill load while + // and constants. For this values we can avoid emitting spill load while // visiting corresponding gc_relocate. // Actually we do not need to record them in this map at all. - // We do this only to check that we are not relocating any unvisited value. + // We do this only to check that we are not relocating any unvisited + // value. SpillMap[V] = None; // Default llvm mechanisms for exporting values which are used in // different basic blocks does not work for gc relocates. // Note that it would be incorrect to teach llvm that all relocates are - // uses of the corresponging values so that it would automatically + // uses of the corresponding values so that it would automatically // export them. Relocates of the spilled values does not use original // value. - if (StatepointSite.getCallSite().isInvoke()) + if (RelocateOpers.getUnderlyingCallSite().getParent() != + StatepointInstr->getParent()) Builder.ExportFromCurrentBlock(V); } } @@ -608,7 +640,7 @@ void SelectionDAGBuilder::visitStatepoint(const CallInst &CI) { } void SelectionDAGBuilder::LowerStatepoint( - ImmutableStatepoint ISP, MachineBasicBlock *LandingPad /*=nullptr*/) { + ImmutableStatepoint ISP, const BasicBlock *EHPadBB /*= nullptr*/) { // The basic scheme here is that information about both the original call and // the safepoint is encoded in the CallInst. We create a temporary call and // lower it, then reverse engineer the calling sequence. @@ -620,14 +652,12 @@ void SelectionDAGBuilder::LowerStatepoint( ImmutableCallSite CS(ISP.getCallSite()); #ifndef NDEBUG - // Consistency check. Don't do this for invokes. It would be too - // expensive to preserve this information across different basic blocks - if (!CS.isInvoke()) { - for (const User *U : CS->users()) { - const CallInst *Call = cast<CallInst>(U); - if (isGCRelocate(Call)) - StatepointLowering.scheduleRelocCall(*Call); - } + // Consistency check. Check only relocates in the same basic block as thier + // statepoint. + for (const User *U : CS->users()) { + const CallInst *Call = cast<CallInst>(U); + if (isGCRelocate(Call) && Call->getParent() == CS.getParent()) + StatepointLowering.scheduleRelocCall(*Call); } #endif @@ -648,7 +678,7 @@ void SelectionDAGBuilder::LowerStatepoint( // Get call node, we will replace it later with statepoint SDNode *CallNode = - lowerCallFromStatepoint(ISP, LandingPad, *this, PendingExports); + lowerCallFromStatepoint(ISP, EHPadBB, *this, PendingExports); // Construct the actual GC_TRANSITION_START, STATEPOINT, and GC_TRANSITION_END // nodes with all the appropriate arguments and return values. @@ -790,7 +820,7 @@ void SelectionDAGBuilder::LowerStatepoint( // Replace original call DAG.ReplaceAllUsesWith(CallNode, SinkNode); // This may update Root - // Remove originall call node + // Remove original call node DAG.DeleteNode(CallNode); // DON'T set the root - under the assumption that it's already set past the @@ -809,8 +839,9 @@ void SelectionDAGBuilder::visitGCResult(const CallInst &CI) { Instruction *I = cast<Instruction>(CI.getArgOperand(0)); assert(isStatepoint(I) && "first argument must be a statepoint token"); - if (isa<InvokeInst>(I)) { - // For invokes we should have stored call result in a virtual register. + if (I->getParent() != CI.getParent()) { + // Statepoint is in different basic block so we should have stored call + // result in a virtual register. // We can not use default getValue() functionality to copy value from this // register because statepoint and actuall call return types can be // different, and getValue() will use CopyFromReg of the wrong type, @@ -833,9 +864,10 @@ void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) { #ifndef NDEBUG // Consistency check - // We skip this check for invoke statepoints. It would be too expensive to - // preserve validation info through different basic blocks. - if (!RelocateOpers.isTiedToInvoke()) { + // We skip this check for relocates not in the same basic block as thier + // statepoint. It would be too expensive to preserve validation info through + // different basic blocks. + if (RelocateOpers.getStatepoint()->getParent() == CI.getParent()) { StatepointLowering.relocCallVisited(CI); } #endif @@ -862,13 +894,14 @@ void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) { // Be conservative: flush all pending loads // TODO: Probably we can be less restrictive on this, - // it may allow more scheduling opprtunities + // it may allow more scheduling opportunities. SDValue Chain = getRoot(); SDValue SpillLoad = - DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain, SpillSlot, - MachinePointerInfo::getFixedStack(*DerivedPtrLocation), - false, false, false, 0); + DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain, SpillSlot, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), + *DerivedPtrLocation), + false, false, false, 0); // Again, be conservative, don't emit pending loads DAG.setRoot(SpillLoad.getValue(1)); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index fbf651277c7f..c64d882d69a4 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -85,21 +85,22 @@ void TargetLowering::ArgListEntry::setAttributes(ImmutableCallSite *CS, std::pair<SDValue, SDValue> TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, - const SDValue *Ops, unsigned NumOps, + ArrayRef<SDValue> Ops, bool isSigned, SDLoc dl, bool doesNotReturn, bool isReturnValueUsed) const { TargetLowering::ArgListTy Args; - Args.reserve(NumOps); + Args.reserve(Ops.size()); TargetLowering::ArgListEntry Entry; - for (unsigned i = 0; i != NumOps; ++i) { - Entry.Node = Ops[i]; + for (SDValue Op : Ops) { + Entry.Node = Op; Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); - Entry.isSExt = shouldSignExtendTypeInLibCall(Ops[i].getValueType(), isSigned); - Entry.isZExt = !shouldSignExtendTypeInLibCall(Ops[i].getValueType(), isSigned); + Entry.isSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned); + Entry.isZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned); Args.push_back(Entry); } + if (LC == RTLIB::UNKNOWN_LIBCALL) report_fatal_error("Unsupported library call operation!"); SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), @@ -115,9 +116,8 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, return LowerCallTo(CLI); } - -/// SoftenSetCCOperands - Soften the operands of a comparison. This code is -/// shared among BR_CC, SELECT_CC, and SETCC handlers. +/// Soften the operands of a comparison. This code is shared among BR_CC, +/// SELECT_CC, and SETCC handlers. void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, @@ -127,6 +127,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, // Expand into one or more soft-fp libcall(s). RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL; + bool ShouldInvertCC = false; switch (CCCode) { case ISD::SETEQ: case ISD::SETOEQ: @@ -166,34 +167,38 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, LC1 = (VT == MVT::f32) ? RTLIB::O_F32 : (VT == MVT::f64) ? RTLIB::O_F64 : RTLIB::O_F128; break; - default: + case ISD::SETONE: + // SETONE = SETOLT | SETOGT + LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : + (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128; + LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 : + (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128; + break; + case ISD::SETUEQ: LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : (VT == MVT::f64) ? RTLIB::UO_F64 : RTLIB::UO_F128; + LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : + (VT == MVT::f64) ? RTLIB::OEQ_F64 : RTLIB::OEQ_F128; + break; + default: + // Invert CC for unordered comparisons + ShouldInvertCC = true; switch (CCCode) { - case ISD::SETONE: - // SETONE = SETOLT | SETOGT - LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : - (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128; - // Fallthrough - case ISD::SETUGT: - LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 : - (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128; - break; - case ISD::SETUGE: - LC2 = (VT == MVT::f32) ? RTLIB::OGE_F32 : - (VT == MVT::f64) ? RTLIB::OGE_F64 : RTLIB::OGE_F128; - break; case ISD::SETULT: - LC2 = (VT == MVT::f32) ? RTLIB::OLT_F32 : - (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128; + LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 : + (VT == MVT::f64) ? RTLIB::OGE_F64 : RTLIB::OGE_F128; break; case ISD::SETULE: - LC2 = (VT == MVT::f32) ? RTLIB::OLE_F32 : + LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 : + (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128; + break; + case ISD::SETUGT: + LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 : (VT == MVT::f64) ? RTLIB::OLE_F64 : RTLIB::OLE_F128; break; - case ISD::SETUEQ: - LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : - (VT == MVT::f64) ? RTLIB::OEQ_F64 : RTLIB::OEQ_F128; + case ISD::SETUGE: + LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : + (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128; break; default: llvm_unreachable("Do not know how to soften this setcc!"); } @@ -201,17 +206,21 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, // Use the target specific return value for comparions lib calls. EVT RetVT = getCmpLibcallReturnType(); - SDValue Ops[2] = { NewLHS, NewRHS }; - NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, 2, false/*sign irrelevant*/, + SDValue Ops[2] = {NewLHS, NewRHS}; + NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, false /*sign irrelevant*/, dl).first; NewRHS = DAG.getConstant(0, dl, RetVT); + CCCode = getCmpLibcallCC(LC1); + if (ShouldInvertCC) + CCCode = getSetCCInverse(CCCode, /*isInteger=*/true); + if (LC2 != RTLIB::UNKNOWN_LIBCALL) { SDValue Tmp = DAG.getNode( ISD::SETCC, dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT), NewLHS, NewRHS, DAG.getCondCode(CCCode)); - NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, 2, false/*sign irrelevant*/, + NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, false/*sign irrelevant*/, dl).first; NewLHS = DAG.getNode( ISD::SETCC, dl, @@ -222,9 +231,8 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, } } -/// getJumpTableEncoding - Return the entry encoding for a jump table in the -/// current function. The returned value is a member of the -/// MachineJumpTableInfo::JTEntryKind enum. +/// Return the entry encoding for a jump table in the current function. The +/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum. unsigned TargetLowering::getJumpTableEncoding() const { // In non-pic modes, just use the address of a block. if (getTargetMachine().getRelocationModel() != Reloc::PIC_) @@ -250,9 +258,8 @@ SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table, return Table; } -/// getPICJumpTableRelocBaseExpr - This returns the relocation base for the -/// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an -/// MCExpr. +/// This returns the relocation base for the given PIC jumptable, the same as +/// getPICJumpTableRelocBase, but as an MCExpr. const MCExpr * TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,MCContext &Ctx) const{ @@ -279,10 +286,9 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // Optimization Methods //===----------------------------------------------------------------------===// -/// ShrinkDemandedConstant - Check to see if the specified operand of the -/// specified instruction is a constant integer. If so, check to see if there -/// are any bits set in the constant that are not demanded. If so, shrink the -/// constant and return true. +/// Check to see if the specified operand of the specified instruction is a +/// constant integer. If so, check to see if there are any bits set in the +/// constant that are not demanded. If so, shrink the constant and return true. bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded) { SDLoc dl(Op); @@ -317,10 +323,9 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op, return false; } -/// ShrinkDemandedOp - Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the -/// casts are free. This uses isZExtFree and ZERO_EXTEND for the widening -/// cast, but it could be generalized for targets with other types of -/// implicit widening casts. +/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. +/// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be +/// generalized for targets with other types of implicit widening casts. bool TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op, unsigned BitWidth, @@ -366,13 +371,13 @@ TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op, return false; } -/// SimplifyDemandedBits - Look at Op. At this point, we know that only the -/// DemandedMask bits of the result of Op are ever used downstream. If we can -/// use this information to simplify Op, create a new simplified DAG node and -/// return true, returning the original and new nodes in Old and New. Otherwise, -/// analyze the expression and return a mask of KnownOne and KnownZero bits for -/// the expression (used to simplify the caller). The KnownZero/One bits may -/// only be accurate for those bits in the DemandedMask. +/// Look at Op. At this point, we know that only the DemandedMask bits of the +/// result of Op are ever used downstream. If we can use this information to +/// simplify Op, create a new simplified DAG node and return true, returning the +/// original and new nodes in Old and New. Otherwise, analyze the expression and +/// return a mask of KnownOne and KnownZero bits for the expression (used to +/// simplify the caller). The KnownZero/One bits may only be accurate for those +/// bits in the DemandedMask. bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask, APInt &KnownZero, @@ -1061,7 +1066,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, Op.getOperand(0).getValueType().isFloatingPoint()) { bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType()); bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32); - if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple()) { + if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple() && + Op.getOperand(0).getValueType() != MVT::f128) { + // Cannot eliminate/lower SHL for f128 yet. EVT Ty = OpVTLegal ? Op.getValueType() : MVT::i32; // Make a FGETSIGN + SHL to move the sign bit into the appropriate // place. We expect the SHL to be eliminated by other optimizations. @@ -1120,9 +1127,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, return false; } -/// computeKnownBitsForTargetNode - Determine which of the bits specified -/// in Mask are known to be either zero or one and return them in the -/// KnownZero/KnownOne bitsets. +/// Determine which of the bits specified in Mask are known to be either zero or +/// one and return them in the KnownZero/KnownOne bitsets. void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero, APInt &KnownOne, @@ -1137,9 +1143,8 @@ void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); } -/// ComputeNumSignBitsForTargetNode - This method can be implemented by -/// targets that want to expose additional information about sign bits to the -/// DAG Combiner. +/// This method can be implemented by targets that want to expose additional +/// information about sign bits to the DAG Combiner. unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, const SelectionDAG &, unsigned Depth) const { @@ -1152,10 +1157,8 @@ unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, return 1; } -/// ValueHasExactlyOneBitSet - Test if the given value is known to have exactly -/// one bit set. This differs from computeKnownBits in that it doesn't need to -/// determine which bit is set. -/// +/// Test if the given value is known to have exactly one bit set. This differs +/// from computeKnownBits in that it doesn't need to determine which bit is set. static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) { // A left-shift of a constant one will have exactly one bit set, because // shifting the bit off the end is undefined. @@ -1239,8 +1242,8 @@ bool TargetLowering::isConstFalseVal(const SDNode *N) const { return CN->isNullValue(); } -/// SimplifySetCC - Try to simplify a setcc built with the specified operands -/// and cc. If it is unable to simplify it, return a null SDValue. +/// Try to simplify a setcc built with the specified operands and cc. If it is +/// unable to simplify it, return a null SDValue. SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, @@ -1270,7 +1273,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, isCondCodeLegal(SwappedCC, N0.getSimpleValueType()))) return DAG.getSetCC(dl, VT, N1, N0, SwappedCC); - if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { + if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { const APInt &C1 = N1C->getAPIntValue(); // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an @@ -1335,7 +1338,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, PreExt = N0->getOperand(0); } else if (N0->getOpcode() == ISD::AND) { // DAGCombine turns costly ZExts into ANDs - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) + if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) if ((C->getAPIntValue()+1).isPowerOf2()) { MinBits = C->getAPIntValue().countTrailingOnes(); PreExt = N0->getOperand(0); @@ -1345,7 +1348,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, MinBits = N0->getOperand(0).getValueSizeInBits(); PreExt = N0->getOperand(0); Signed = true; - } else if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(N0)) { + } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) { // ZEXTLOAD / SEXTLOAD if (LN0->getExtensionType() == ISD::ZEXTLOAD) { MinBits = LN0->getMemoryVT().getSizeInBits(); @@ -1697,8 +1700,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) && N0.getOpcode() == ISD::AND) { auto &DL = DAG.getDataLayout(); - if (ConstantSDNode *AndRHS = - dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { EVT ShiftTy = DCI.isBeforeLegalize() ? getPointerTy(DL) : getShiftAmountTy(N0.getValueType(), DL); @@ -1728,8 +1730,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // (X & -256) == 256 -> (X >> 8) == 1 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && N0.getOpcode() == ISD::AND && N0.hasOneUse()) { - if (ConstantSDNode *AndRHS = - dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { const APInt &AndRHSC = AndRHS->getAPIntValue(); if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) { unsigned ShiftBits = AndRHSC.countTrailingZeros(); @@ -1783,7 +1784,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // Constant fold or commute setcc. SDValue O = DAG.FoldSetCC(VT, N0, N1, Cond, dl); if (O.getNode()) return O; - } else if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1.getNode())) { + } else if (auto *CFP = dyn_cast<ConstantFPSDNode>(N1.getNode())) { // If the RHS of an FP comparison is a constant, simplify it away in // some cases. if (CFP->getValueAPF().isNaN()) { @@ -1900,8 +1901,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // to be careful about increasing register pressure needlessly. bool LegalRHSImm = false; - if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) { - if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) { + if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { // Turn (X+C1) == C2 --> X == C2-C1 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) { return DAG.getSetCC(dl, VT, N0.getOperand(0), @@ -1924,7 +1925,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } // Turn (C1-X) == C2 --> X == C1-C2 - if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) { + if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) { if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) { return DAG.getSetCC(dl, VT, N0.getOperand(1), @@ -2075,12 +2076,11 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return SDValue(); } -/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the -/// node is a GlobalAddress + offset. +/// Returns true (and the GlobalValue and the offset) if the node is a +/// GlobalAddress + offset. bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const { - if (isa<GlobalAddressSDNode>(N)) { - GlobalAddressSDNode *GASD = cast<GlobalAddressSDNode>(N); + if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) { GA = GASD->getGlobal(); Offset += GASD->getOffset(); return true; @@ -2090,14 +2090,12 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA, SDValue N1 = N->getOperand(0); SDValue N2 = N->getOperand(1); if (isGAPlusOffset(N1.getNode(), GA, Offset)) { - ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2); - if (V) { + if (auto *V = dyn_cast<ConstantSDNode>(N2)) { Offset += V->getSExtValue(); return true; } } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) { - ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1); - if (V) { + if (auto *V = dyn_cast<ConstantSDNode>(N1)) { Offset += V->getSExtValue(); return true; } @@ -2107,9 +2105,8 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA, return false; } - -SDValue TargetLowering:: -PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { +SDValue TargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { // Default implementation: no optimization. return SDValue(); } @@ -2159,9 +2156,9 @@ TargetLowering::getConstraintType(StringRef Constraint) const { return C_Unknown; } -/// LowerXConstraint - try to replace an X constraint, which matches anything, -/// with another that has more specific requirements based on the type of the -/// corresponding operand. +/// Try to replace an X constraint, which matches anything, with another that +/// has more specific requirements based on the type of the corresponding +/// operand. const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{ if (ConstraintVT.isInteger()) return "r"; @@ -2170,8 +2167,8 @@ const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{ return nullptr; } -/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops -/// vector. If it is invalid, don't add anything to Ops. +/// Lower the specified operand into the Ops vector. +/// If it is invalid, don't add anything to Ops. void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, @@ -2284,31 +2281,30 @@ TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI, //===----------------------------------------------------------------------===// // Constraint Selection. -/// isMatchingInputConstraint - Return true of this is an input operand that is -/// a matching constraint like "4". +/// Return true of this is an input operand that is a matching constraint like +/// "4". bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const { assert(!ConstraintCode.empty() && "No known constraint!"); return isdigit(static_cast<unsigned char>(ConstraintCode[0])); } -/// getMatchedOperand - If this is an input matching constraint, this method -/// returns the output operand it matches. +/// If this is an input matching constraint, this method returns the output +/// operand it matches. unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const { assert(!ConstraintCode.empty() && "No known constraint!"); return atoi(ConstraintCode.c_str()); } - -/// ParseConstraints - Split up the constraint string from the inline -/// assembly value into the specific constraints and their prefixes, -/// and also tie in the associated operand values. +/// Split up the constraint string from the inline assembly value into the +/// specific constraints and their prefixes, and also tie in the associated +/// operand values. /// If this returns an empty vector, and if the constraint string itself /// isn't empty, there was an error parsing. TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, ImmutableCallSite CS) const { - /// ConstraintOperands - Information about all of the constraints. + /// Information about all of the constraints. AsmOperandInfoVector ConstraintOperands; const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); unsigned maCount = 0; // Largest number of multiple alternative constraints. @@ -2483,16 +2479,13 @@ TargetLowering::ParseConstraints(const DataLayout &DL, " incompatible type!"); } } - } } return ConstraintOperands; } - -/// getConstraintGenerality - Return an integer indicating how general CT -/// is. +/// Return an integer indicating how general CT is. static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { switch (CT) { case TargetLowering::C_Other: @@ -2581,8 +2574,8 @@ TargetLowering::ConstraintWeight return weight; } -/// ChooseConstraint - If there are multiple different constraints that we -/// could pick for this operand (e.g. "imr") try to pick the 'best' one. +/// If there are multiple different constraints that we could pick for this +/// operand (e.g. "imr") try to pick the 'best' one. /// This is somewhat tricky: constraints fall into four classes: /// Other -> immediates and magic values /// Register -> one specific register @@ -2649,9 +2642,8 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, OpInfo.ConstraintType = BestType; } -/// ComputeConstraintToUse - Determines the constraint code and constraint -/// type to use for the specific AsmOperandInfo, setting -/// OpInfo.ConstraintCode and OpInfo.ConstraintType. +/// Determines the constraint code and constraint type to use for the specific +/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType. void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG) const { @@ -2717,6 +2709,16 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d, return Mul; } +SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, + SelectionDAG &DAG, + std::vector<SDNode *> *Created) const { + AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLI.isIntDivCheap(N->getValueType(0), Attr)) + return SDValue(N,0); // Lower SDIV as SDIV + return SDValue(); +} + /// \brief Given an ISD::SDIV node expressing a divide by constant, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. @@ -3036,3 +3038,46 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, DAG.getConstant(0, dl, NVT), Ret, ISD::SETLT); return true; } + +//===----------------------------------------------------------------------===// +// Implementation of Emulated TLS Model +//===----------------------------------------------------------------------===// + +SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, + SelectionDAG &DAG) const { + // Access to address of TLS varialbe xyz is lowered to a function call: + // __emutls_get_address( address of global variable named "__emutls_v.xyz" ) + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + PointerType *VoidPtrType = Type::getInt8PtrTy(*DAG.getContext()); + SDLoc dl(GA); + + ArgListTy Args; + ArgListEntry Entry; + std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str(); + Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent()); + StringRef EmuTlsVarName(NameString); + GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName); + if (!EmuTlsVar) + EmuTlsVar = dyn_cast_or_null<GlobalVariable>( + VariableModule->getOrInsertGlobal(EmuTlsVarName, VoidPtrType)); + Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT); + Entry.Ty = VoidPtrType; + Args.push_back(Entry); + + SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()); + CLI.setCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args), 0); + std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); + + // TLSADDR will be codegen'ed as call. Inform MFI that function has calls. + // At last for X86 targets, maybe good for other targets too? + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MFI->setAdjustsStack(true); // Is this only for X86 target? + MFI->setHasCalls(true); + + assert((GA->getOffset() == 0) && + "Emulated TLS must have zero offset in GlobalAddressSDNode"); + return CallResult.first; +} diff --git a/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp b/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp index e7b2a8e72d2c..878eeeed0f6a 100644 --- a/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp +++ b/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp @@ -112,7 +112,7 @@ public: case 1: // Find all 'return', 'resume', and 'unwind' instructions. while (StateBB != StateE) { - BasicBlock *CurBB = StateBB++; + BasicBlock *CurBB = &*StateBB++; // Branches and invokes do not escape, only unwind, resume, and return // do. @@ -120,7 +120,7 @@ public: if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI)) continue; - Builder.SetInsertPoint(TI->getParent(), TI); + Builder.SetInsertPoint(TI); return &Builder; } @@ -163,8 +163,8 @@ public: // Split the basic block containing the function call. BasicBlock *CallBB = CI->getParent(); - BasicBlock *NewBB = - CallBB->splitBasicBlock(CI, CallBB->getName() + ".cont"); + BasicBlock *NewBB = CallBB->splitBasicBlock( + CI->getIterator(), CallBB->getName() + ".cont"); // Remove the unconditional branch inserted at the end of CallBB. CallBB->getInstList().pop_back(); @@ -184,7 +184,7 @@ public: delete CI; } - Builder.SetInsertPoint(RI->getParent(), RI); + Builder.SetInsertPoint(RI); return &Builder; } } diff --git a/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp b/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp index 4463cc7d3c51..f8aa1e2b0b9a 100644 --- a/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp +++ b/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp @@ -43,9 +43,11 @@ // points must be in the same loop. // Property #3 is ensured via the MachineBlockFrequencyInfo. // -// If this pass found points matching all this properties, then -// MachineFrameInfo is updated this that information. +// If this pass found points matching all these properties, then +// MachineFrameInfo is updated with this information. //===----------------------------------------------------------------------===// +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" // To check for profitability. #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" @@ -61,11 +63,14 @@ #include "llvm/CodeGen/Passes.h" // To know about callee-saved. #include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/Debug.h" // To query the target about frame lowering. #include "llvm/Target/TargetFrameLowering.h" // To know about frame setup operation. #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" // To access TargetInstrInfo. #include "llvm/Target/TargetSubtargetInfo.h" @@ -78,6 +83,10 @@ STATISTIC(NumCandidates, "Number of shrink-wrapping candidates"); STATISTIC(NumCandidatesDropped, "Number of shrink-wrapping candidates dropped because of frequency"); +static cl::opt<cl::boolOrDefault> + EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden, + cl::desc("enable the shrink-wrapping pass")); + namespace { /// \brief Class to determine where the safe point to insert the /// prologue and epilogue are. @@ -113,18 +122,38 @@ class ShrinkWrap : public MachineFunctionPass { unsigned FrameDestroyOpcode; /// Entry block. const MachineBasicBlock *Entry; + typedef SmallSetVector<unsigned, 16> SetOfRegs; + /// Registers that need to be saved for the current function. + mutable SetOfRegs CurrentCSRs; + /// Current MachineFunction. + MachineFunction *MachineFunc; /// \brief Check if \p MI uses or defines a callee-saved register or /// a frame index. If this is the case, this means \p MI must happen /// after Save and before Restore. - bool useOrDefCSROrFI(const MachineInstr &MI) const; + bool useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS) const; + + const SetOfRegs &getCurrentCSRs(RegScavenger *RS) const { + if (CurrentCSRs.empty()) { + BitVector SavedRegs; + const TargetFrameLowering *TFI = + MachineFunc->getSubtarget().getFrameLowering(); + + TFI->determineCalleeSaves(*MachineFunc, SavedRegs, RS); + + for (int Reg = SavedRegs.find_first(); Reg != -1; + Reg = SavedRegs.find_next(Reg)) + CurrentCSRs.insert((unsigned)Reg); + } + return CurrentCSRs; + } /// \brief Update the Save and Restore points such that \p MBB is in /// the region that is dominated by Save and post-dominated by Restore /// and Save and Restore still match the safe point definition. /// Such point may not exist and Save and/or Restore may be null after /// this call. - void updateSaveRestorePoints(MachineBasicBlock &MBB); + void updateSaveRestorePoints(MachineBasicBlock &MBB, RegScavenger *RS); /// \brief Initialize the pass for \p MF. void init(MachineFunction &MF) { @@ -140,6 +169,8 @@ class ShrinkWrap : public MachineFunctionPass { FrameSetupOpcode = TII.getCallFrameSetupOpcode(); FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); Entry = &MF.front(); + CurrentCSRs.clear(); + MachineFunc = &MF; ++NumFunc; } @@ -148,6 +179,9 @@ class ShrinkWrap : public MachineFunctionPass { /// shrink-wrapping. bool ArePointsInteresting() const { return Save != Entry && Save && Restore; } + /// \brief Check if shrink wrapping is enabled for this target and function. + static bool isShrinkWrapEnabled(const MachineFunction &MF); + public: static char ID; @@ -185,27 +219,34 @@ INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_END(ShrinkWrap, "shrink-wrap", "Shrink Wrap Pass", false, false) -bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI) const { +bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI, + RegScavenger *RS) const { if (MI.getOpcode() == FrameSetupOpcode || MI.getOpcode() == FrameDestroyOpcode) { DEBUG(dbgs() << "Frame instruction: " << MI << '\n'); return true; } for (const MachineOperand &MO : MI.operands()) { - bool UseCSR = false; + bool UseOrDefCSR = false; if (MO.isReg()) { unsigned PhysReg = MO.getReg(); if (!PhysReg) continue; assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && "Unallocated register?!"); - UseCSR = RCI.getLastCalleeSavedAlias(PhysReg); + UseOrDefCSR = RCI.getLastCalleeSavedAlias(PhysReg); + } else if (MO.isRegMask()) { + // Check if this regmask clobbers any of the CSRs. + for (unsigned Reg : getCurrentCSRs(RS)) { + if (MO.clobbersPhysReg(Reg)) { + UseOrDefCSR = true; + break; + } + } } - // TODO: Handle regmask more accurately. - // For now, be conservative about them. - if (UseCSR || MO.isFI() || MO.isRegMask()) { - DEBUG(dbgs() << "Use or define CSR(" << UseCSR << ") or FI(" << MO.isFI() - << "): " << MI << '\n'); + if (UseOrDefCSR || MO.isFI()) { + DEBUG(dbgs() << "Use or define CSR(" << UseOrDefCSR << ") or FI(" + << MO.isFI() << "): " << MI << '\n'); return true; } } @@ -225,7 +266,8 @@ MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs, return IDom; } -void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB) { +void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB, + RegScavenger *RS) { // Get rid of the easy cases first. if (!Save) Save = &MBB; @@ -246,7 +288,7 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB) { // terminator. if (Restore == &MBB) { for (const MachineInstr &Terminator : MBB.terminators()) { - if (!useOrDefCSROrFI(Terminator)) + if (!useOrDefCSROrFI(Terminator, RS)) continue; // One of the terminator needs to happen before the restore point. if (MBB.succ_empty()) { @@ -277,7 +319,24 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB) { while (Save && Restore && (!(SaveDominatesRestore = MDT->dominates(Save, Restore)) || !(RestorePostDominatesSave = MPDT->dominates(Restore, Save)) || - MLI->getLoopFor(Save) != MLI->getLoopFor(Restore))) { + // Post-dominance is not enough in loops to ensure that all uses/defs + // are after the prologue and before the epilogue at runtime. + // E.g., + // while(1) { + // Save + // Restore + // if (...) + // break; + // use/def CSRs + // } + // All the uses/defs of CSRs are dominated by Save and post-dominated + // by Restore. However, the CSRs uses are still reachable after + // Restore and before Save are executed. + // + // For now, just push the restore/save points outside of loops. + // FIXME: Refine the criteria to still find interesting cases + // for loops. + MLI->getLoopFor(Save) || MLI->getLoopFor(Restore))) { // Fix (A). if (!SaveDominatesRestore) { Save = MDT->findNearestCommonDominator(Save, Restore); @@ -288,35 +347,72 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB) { Restore = MPDT->findNearestCommonDominator(Restore, Save); // Fix (C). - if (Save && Restore && Save != Restore && - MLI->getLoopFor(Save) != MLI->getLoopFor(Restore)) { - if (MLI->getLoopDepth(Save) > MLI->getLoopDepth(Restore)) - // Push Save outside of this loop. - Save = FindIDom<>(*Save, Save->predecessors(), *MDT); - else + if (Save && Restore && + (MLI->getLoopFor(Save) || MLI->getLoopFor(Restore))) { + if (MLI->getLoopDepth(Save) > MLI->getLoopDepth(Restore)) { + // Push Save outside of this loop if immediate dominator is different + // from save block. If immediate dominator is not different, bail out. + MachineBasicBlock *IDom = FindIDom<>(*Save, Save->predecessors(), *MDT); + if (IDom != Save) + Save = IDom; + else { + Save = nullptr; + break; + } + } else { + // If the loop does not exit, there is no point in looking + // for a post-dominator outside the loop. + SmallVector<MachineBasicBlock*, 4> ExitBlocks; + MLI->getLoopFor(Restore)->getExitingBlocks(ExitBlocks); // Push Restore outside of this loop. - Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT); + // Look for the immediate post-dominator of the loop exits. + MachineBasicBlock *IPdom = Restore; + for (MachineBasicBlock *LoopExitBB: ExitBlocks) { + IPdom = FindIDom<>(*IPdom, LoopExitBB->successors(), *MPDT); + if (!IPdom) + break; + } + // If the immediate post-dominator is not in a less nested loop, + // then we are stuck in a program with an infinite loop. + // In that case, we will not find a safe point, hence, bail out. + if (IPdom && MLI->getLoopDepth(IPdom) < MLI->getLoopDepth(Restore)) + Restore = IPdom; + else { + Restore = nullptr; + break; + } + } } } } bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { - if (MF.empty()) + if (MF.empty() || !isShrinkWrapEnabled(MF)) return false; + DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n'); init(MF); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + std::unique_ptr<RegScavenger> RS( + TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr); + for (MachineBasicBlock &MBB : MF) { DEBUG(dbgs() << "Look into: " << MBB.getNumber() << ' ' << MBB.getName() << '\n'); + if (MBB.isEHFuncletEntry()) { + DEBUG(dbgs() << "EH Funclets are not supported yet.\n"); + return false; + } + for (const MachineInstr &MI : MBB) { - if (!useOrDefCSROrFI(MI)) + if (!useOrDefCSROrFI(MI, RS.get())) continue; // Save (resp. restore) point must dominate (resp. post dominate) // MI. Look for the proper basic block for those. - updateSaveRestorePoints(MBB); + updateSaveRestorePoints(MBB, RS.get()); // If we are at a point where we cannot improve the placement of // save/restore instructions, just give up. if (!ArePointsInteresting()) { @@ -368,7 +464,7 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { break; NewBB = Restore; } - updateSaveRestorePoints(*NewBB); + updateSaveRestorePoints(*NewBB, RS.get()); } while (Save && Restore); if (!ArePointsInteresting()) { @@ -386,3 +482,30 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { ++NumCandidates; return false; } + +bool ShrinkWrap::isShrinkWrapEnabled(const MachineFunction &MF) { + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + + switch (EnableShrinkWrapOpt) { + case cl::BOU_UNSET: + return TFI->enableShrinkWrapping(MF) && + // Windows with CFI has some limitations that make it impossible + // to use shrink-wrapping. + !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && + // Sanitizers look at the value of the stack at the location + // of the crash. Since a crash can happen anywhere, the + // frame must be lowered before anything else happen for the + // sanitizers to be able to get a correct stack frame. + !(MF.getFunction()->hasFnAttribute(Attribute::SanitizeAddress) || + MF.getFunction()->hasFnAttribute(Attribute::SanitizeThread) || + MF.getFunction()->hasFnAttribute(Attribute::SanitizeMemory)); + // If EnableShrinkWrap is set, it takes precedence on whatever the + // target sets. The rational is that we assume we want to test + // something related to shrink-wrapping. + case cl::BOU_TRUE: + return true; + case cl::BOU_FALSE: + return false; + } + llvm_unreachable("Invalid shrink-wrapping state"); +} diff --git a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp index d236e1f5ab6f..e1f242a08de1 100644 --- a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp @@ -50,7 +50,7 @@ class SjLjEHPrepare : public FunctionPass { Type *FunctionContextTy; Constant *RegisterFn; Constant *UnregisterFn; - Constant *BuiltinSetjmpFn; + Constant *BuiltinSetupDispatchFn; Constant *FrameAddrFn; Constant *StackAddrFn; Constant *StackRestoreFn; @@ -112,7 +112,8 @@ bool SjLjEHPrepare::doInitialization(Module &M) { FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress); StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave); StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore); - BuiltinSetjmpFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setjmp); + BuiltinSetupDispatchFn = + Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setup_dispatch); LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda); CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite); FuncCtxFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_functioncontext); @@ -178,8 +179,8 @@ void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, // values and replace the LPI with that aggregate. Type *LPadType = LPI->getType(); Value *LPadVal = UndefValue::get(LPadType); - IRBuilder<> Builder( - std::next(BasicBlock::iterator(cast<Instruction>(SelVal)))); + auto *SelI = cast<Instruction>(SelVal); + IRBuilder<> Builder(SelI->getParent(), std::next(SelI->getIterator())); LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val"); LPadVal = Builder.CreateInsertValue(LPadVal, SelVal, 1, "lpad.val"); @@ -190,7 +191,7 @@ void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, /// it with all of the data that we know at this point. Value *SjLjEHPrepare::setupFunctionContext(Function &F, ArrayRef<LandingPadInst *> LPads) { - BasicBlock *EntryBB = F.begin(); + BasicBlock *EntryBB = &F.front(); // Create an alloca for the incoming jump buffer ptr and the new jump buffer // that needs to be restored on all exits from the function. This is an alloca @@ -198,12 +199,13 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F, auto &DL = F.getParent()->getDataLayout(); unsigned Align = DL.getPrefTypeAlignment(FunctionContextTy); FuncCtx = new AllocaInst(FunctionContextTy, nullptr, Align, "fn_context", - EntryBB->begin()); + &EntryBB->front()); // Fill in the function context structure. for (unsigned I = 0, E = LPads.size(); I != E; ++I) { LandingPadInst *LPI = LPads[I]; - IRBuilder<> Builder(LPI->getParent()->getFirstInsertionPt()); + IRBuilder<> Builder(LPI->getParent(), + LPI->getParent()->getFirstInsertionPt()); // Reference the __data field. Value *FCData = @@ -250,21 +252,20 @@ void SjLjEHPrepare::lowerIncomingArguments(Function &F) { while (isa<AllocaInst>(AfterAllocaInsPt) && isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsPt)->getArraySize())) ++AfterAllocaInsPt; + assert(AfterAllocaInsPt != F.front().end()); - for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE; - ++AI) { - Type *Ty = AI->getType(); + for (auto &AI : F.args()) { + Type *Ty = AI.getType(); // Use 'select i8 true, %arg, undef' to simulate a 'no-op' instruction. Value *TrueValue = ConstantInt::getTrue(F.getContext()); Value *UndefValue = UndefValue::get(Ty); - Instruction *SI = SelectInst::Create(TrueValue, AI, UndefValue, - AI->getName() + ".tmp", - AfterAllocaInsPt); - AI->replaceAllUsesWith(SI); + Instruction *SI = SelectInst::Create( + TrueValue, &AI, UndefValue, AI.getName() + ".tmp", &*AfterAllocaInsPt); + AI.replaceAllUsesWith(SI); // Reset the operand, because it was clobbered by the RAUW above. - SI->setOperand(1, AI); + SI->setOperand(1, &AI); } } @@ -279,7 +280,7 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, // Ignore obvious cases we don't have to handle. In particular, most // instructions either have no uses or only have a single use inside the // current block. Ignore them quickly. - Instruction *Inst = II; + Instruction *Inst = &*II; if (Inst->use_empty()) continue; if (Inst->hasOneUse() && @@ -360,7 +361,7 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, DemotePHIToStack(PN); // Move the landingpad instruction back to the top of the landing pad block. - LPI->moveBefore(UnwindBlock->begin()); + LPI->moveBefore(&UnwindBlock->front()); } } @@ -400,7 +401,7 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { Value *FuncCtx = setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end())); - BasicBlock *EntryBB = F.begin(); + BasicBlock *EntryBB = &F.front(); IRBuilder<> Builder(EntryBB->getTerminator()); // Get a reference to the jump buffer. @@ -421,9 +422,8 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { Val = Builder.CreateCall(StackAddrFn, {}, "sp"); Builder.CreateStore(Val, StackPtr, /*isVolatile=*/true); - // Call the setjmp instrinsic. It fills in the rest of the jmpbuf. - Value *SetjmpArg = Builder.CreateBitCast(JBufPtr, Builder.getInt8PtrTy()); - Builder.CreateCall(BuiltinSetjmpFn, SetjmpArg); + // Call the setup_dispatch instrinsic. It fills in the rest of the jmpbuf. + Builder.CreateCall(BuiltinSetupDispatchFn, {}); // Store a pointer to the function context so that the back-end will know // where to look for it. @@ -475,7 +475,7 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { continue; } Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp"); - StackAddr->insertAfter(I); + StackAddr->insertAfter(&*I); Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true); StoreStackAddr->insertAfter(StackAddr); } diff --git a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp index 025ae70ed888..c9d23f67bdee 100644 --- a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp +++ b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp @@ -172,8 +172,8 @@ void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB, // optionally includes an additional position prior to MBB->begin(), indicated // by the includeStart flag. This is done so that we can iterate MIs in a MBB // in parallel with SlotIndexes, but there should be a better way to do this. - IndexList::iterator ListB = startIdx.listEntry(); - IndexList::iterator ListI = endIdx.listEntry(); + IndexList::iterator ListB = startIdx.listEntry()->getIterator(); + IndexList::iterator ListI = endIdx.listEntry()->getIterator(); MachineBasicBlock::iterator MBBI = End; bool pastStart = false; while (ListI != ListB || MBBI != Begin || (includeStart && !pastStart)) { diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.cpp b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp index 97a5424aa560..d30cfc27bf4b 100644 --- a/contrib/llvm/lib/CodeGen/SpillPlacement.cpp +++ b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp @@ -36,7 +36,6 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Format.h" #include "llvm/Support/ManagedStatic.h" using namespace llvm; @@ -188,9 +187,9 @@ bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) { BlockFrequencies.resize(mf.getNumBlockIDs()); MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); setThreshold(MBFI->getEntryFreq()); - for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I) { - unsigned Num = I->getNumber(); - BlockFrequencies[Num] = MBFI->getBlockFreq(I); + for (auto &I : mf) { + unsigned Num = I.getNumber(); + BlockFrequencies[Num] = MBFI->getBlockFreq(&I); } // We never change the function. diff --git a/contrib/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm/lib/CodeGen/SplitKit.cpp index dab1dfe4f1f8..51dddabed2d9 100644 --- a/contrib/llvm/lib/CodeGen/SplitKit.cpp +++ b/contrib/llvm/lib/CodeGen/SplitKit.cpp @@ -56,6 +56,7 @@ void SplitAnalysis::clear() { SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) { const MachineBasicBlock *MBB = MF.getBlockNumbered(Num); + // FIXME: Handle multiple EH pad successors. const MachineBasicBlock *LPad = MBB->getLandingPadSuccessor(); std::pair<SlotIndex, SlotIndex> &LSP = LastSplitPoint[Num]; SlotIndex MBBEnd = LIS.getMBBEndIdx(MBB); @@ -176,10 +177,11 @@ bool SplitAnalysis::calcLiveBlockInfo() { UseE = UseSlots.end(); // Loop over basic blocks where CurLI is live. - MachineFunction::iterator MFI = LIS.getMBBFromIndex(LVI->start); + MachineFunction::iterator MFI = + LIS.getMBBFromIndex(LVI->start)->getIterator(); for (;;) { BlockInfo BI; - BI.MBB = MFI; + BI.MBB = &*MFI; SlotIndex Start, Stop; std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); @@ -259,7 +261,7 @@ bool SplitAnalysis::calcLiveBlockInfo() { if (LVI->start < Stop) ++MFI; else - MFI = LIS.getMBBFromIndex(LVI->start); + MFI = LIS.getMBBFromIndex(LVI->start)->getIterator(); } assert(getNumLiveBlocks() == countLiveBlocks(CurLI) && "Bad block count"); @@ -275,8 +277,9 @@ unsigned SplitAnalysis::countLiveBlocks(const LiveInterval *cli) const { unsigned Count = 0; // Loop over basic blocks where li is live. - MachineFunction::const_iterator MFI = LIS.getMBBFromIndex(LVI->start); - SlotIndex Stop = LIS.getMBBEndIdx(MFI); + MachineFunction::const_iterator MFI = + LIS.getMBBFromIndex(LVI->start)->getIterator(); + SlotIndex Stop = LIS.getMBBEndIdx(&*MFI); for (;;) { ++Count; LVI = li->advanceTo(LVI, Stop); @@ -284,7 +287,7 @@ unsigned SplitAnalysis::countLiveBlocks(const LiveInterval *cli) const { return Count; do { ++MFI; - Stop = LIS.getMBBEndIdx(MFI); + Stop = LIS.getMBBEndIdx(&*MFI); } while (Stop <= LVI->start); } } @@ -864,9 +867,9 @@ bool SplitEditor::transferValues() { // This value has multiple defs in RegIdx, but it wasn't rematerialized, // so the live range is accurate. Add live-in blocks in [Start;End) to the // LiveInBlocks. - MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start); + MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start)->getIterator(); SlotIndex BlockStart, BlockEnd; - std::tie(BlockStart, BlockEnd) = LIS.getSlotIndexes()->getMBBRange(MBB); + std::tie(BlockStart, BlockEnd) = LIS.getSlotIndexes()->getMBBRange(&*MBB); // The first block may be live-in, or it may have its own def. if (Start != BlockStart) { @@ -875,7 +878,7 @@ bool SplitEditor::transferValues() { DEBUG(dbgs() << ':' << VNI->id << "*BB#" << MBB->getNumber()); // MBB has its own def. Is it also live-out? if (BlockEnd <= End) - LRC.setLiveOutValue(MBB, VNI); + LRC.setLiveOutValue(&*MBB, VNI); // Skip to the next block for live-in. ++MBB; @@ -886,23 +889,23 @@ bool SplitEditor::transferValues() { assert(Start <= BlockStart && "Expected live-in block"); while (BlockStart < End) { DEBUG(dbgs() << ">BB#" << MBB->getNumber()); - BlockEnd = LIS.getMBBEndIdx(MBB); + BlockEnd = LIS.getMBBEndIdx(&*MBB); if (BlockStart == ParentVNI->def) { // This block has the def of a parent PHI, so it isn't live-in. assert(ParentVNI->isPHIDef() && "Non-phi defined at block start?"); VNInfo *VNI = LR.extendInBlock(BlockStart, std::min(BlockEnd, End)); assert(VNI && "Missing def for complex mapped parent PHI"); if (End >= BlockEnd) - LRC.setLiveOutValue(MBB, VNI); // Live-out as well. + LRC.setLiveOutValue(&*MBB, VNI); // Live-out as well. } else { // This block needs a live-in value. The last block covered may not // be live-out. if (End < BlockEnd) - LRC.addLiveInBlock(LR, MDT[MBB], End); + LRC.addLiveInBlock(LR, MDT[&*MBB], End); else { // Live-through, and we don't know the value. - LRC.addLiveInBlock(LR, MDT[MBB]); - LRC.setLiveOutValue(MBB, nullptr); + LRC.addLiveInBlock(LR, MDT[&*MBB]); + LRC.setLiveOutValue(&*MBB, nullptr); } } BlockStart = BlockEnd; @@ -1081,16 +1084,14 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { ConnectedVNInfoEqClasses ConEQ(LIS); for (unsigned i = 0, e = Edit->size(); i != e; ++i) { // Don't use iterators, they are invalidated by create() below. - LiveInterval *li = &LIS.getInterval(Edit->get(i)); - unsigned NumComp = ConEQ.Classify(li); - if (NumComp <= 1) - continue; - DEBUG(dbgs() << " " << NumComp << " components: " << *li << '\n'); - SmallVector<LiveInterval*, 8> dups; - dups.push_back(li); - for (unsigned j = 1; j != NumComp; ++j) - dups.push_back(&Edit->createEmptyInterval()); - ConEQ.Distribute(&dups[0], MRI); + unsigned VReg = Edit->get(i); + LiveInterval &LI = LIS.getInterval(VReg); + SmallVector<LiveInterval*, 8> SplitLIs; + LIS.splitSeparateComponents(LI, SplitLIs); + unsigned Original = VRM.getOriginal(VReg); + for (LiveInterval *SplitLI : SplitLIs) + VRM.setIsSplitFromReg(SplitLI->reg, Original); + // The new intervals all map back to i. if (LRMap) LRMap->resize(Edit->size(), i); diff --git a/contrib/llvm/lib/CodeGen/StackMaps.cpp b/contrib/llvm/lib/CodeGen/StackMaps.cpp index 116eef66c580..b3cd8b3d80bb 100644 --- a/contrib/llvm/lib/CodeGen/StackMaps.cpp +++ b/contrib/llvm/lib/CodeGen/StackMaps.cpp @@ -94,7 +94,9 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, default: llvm_unreachable("Unrecognized operand type."); case StackMaps::DirectMemRefOp: { - unsigned Size = AP.TM.getDataLayout()->getPointerSizeInBits(); + auto &DL = AP.MF->getDataLayout(); + + unsigned Size = DL.getPointerSizeInBits(); assert((Size % 8) == 0 && "Need pointer size in bytes."); Size /= 8; unsigned Reg = (++MOI)->getReg(); diff --git a/contrib/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm/lib/CodeGen/StackProtector.cpp index bcea37a3aafa..db3fef524b30 100644 --- a/contrib/llvm/lib/CodeGen/StackProtector.cpp +++ b/contrib/llvm/lib/CodeGen/StackProtector.cpp @@ -373,7 +373,7 @@ bool StackProtector::InsertStackProtectors() { Value *StackGuardVar = nullptr; // The stack guard variable. for (Function::iterator I = F->begin(), E = F->end(); I != E;) { - BasicBlock *BB = I++; + BasicBlock *BB = &*I++; ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()); if (!RI) continue; @@ -433,7 +433,7 @@ bool StackProtector::InsertStackProtectors() { BasicBlock *FailBB = CreateFailBB(); // Split the basic block before the return instruction. - BasicBlock *NewBB = BB->splitBasicBlock(RI, "SP_return"); + BasicBlock *NewBB = BB->splitBasicBlock(RI->getIterator(), "SP_return"); // Update the dominator tree if we need to. if (DT && DT->isReachableFromEntry(BB)) { @@ -453,22 +453,20 @@ bool StackProtector::InsertStackProtectors() { LoadInst *LI1 = B.CreateLoad(StackGuardVar); LoadInst *LI2 = B.CreateLoad(AI); Value *Cmp = B.CreateICmpEQ(LI1, LI2); - unsigned SuccessWeight = - BranchProbabilityInfo::getBranchWeightStackProtector(true); - unsigned FailureWeight = - BranchProbabilityInfo::getBranchWeightStackProtector(false); + auto SuccessProb = + BranchProbabilityInfo::getBranchProbStackProtector(true); + auto FailureProb = + BranchProbabilityInfo::getBranchProbStackProtector(false); MDNode *Weights = MDBuilder(F->getContext()) - .createBranchWeights(SuccessWeight, FailureWeight); + .createBranchWeights(SuccessProb.getNumerator(), + FailureProb.getNumerator()); B.CreateCondBr(Cmp, NewBB, FailBB, Weights); } } // Return if we didn't modify any basic blocks. i.e., there are no return // statements in the function. - if (!HasPrologue) - return false; - - return true; + return HasPrologue; } /// CreateFailBB - Create a basic block to jump to when the stack protector diff --git a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp index a5a175f2c8f0..51f4d0e68172 100644 --- a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp +++ b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp @@ -318,7 +318,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { if (NewFI == -1 || (NewFI == (int)SS)) continue; - const PseudoSourceValue *NewSV = PseudoSourceValue::getFixedStack(NewFI); + const PseudoSourceValue *NewSV = MF.getPSVManager().getFixedStack(NewFI); SmallVectorImpl<MachineMemOperand *> &RefMMOs = SSRefs[SS]; for (unsigned i = 0, e = RefMMOs.size(); i != e; ++i) RefMMOs[i]->setValue(NewSV); diff --git a/contrib/llvm/lib/CodeGen/StatepointExampleGC.cpp b/contrib/llvm/lib/CodeGen/StatepointExampleGC.cpp index 95dfd75018c1..3f60e18fafa9 100644 --- a/contrib/llvm/lib/CodeGen/StatepointExampleGC.cpp +++ b/contrib/llvm/lib/CodeGen/StatepointExampleGC.cpp @@ -34,9 +34,9 @@ public: UsesMetadata = false; CustomRoots = false; } - Optional<bool> isGCManagedPointer(const Value *V) const override { + Optional<bool> isGCManagedPointer(const Type *Ty) const override { // Method is only valid on pointer typed values. - PointerType *PT = cast<PointerType>(V->getType()); + const PointerType *PT = cast<PointerType>(Ty); // For the sake of this example GC, we arbitrarily pick addrspace(1) as our // GC managed heap. We know that a pointer into this heap needs to be // updated and that no other pointer does. Note that addrspace(1) is used diff --git a/contrib/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm/lib/CodeGen/TailDuplication.cpp index 237460cd9051..d2fbf533a787 100644 --- a/contrib/llvm/lib/CodeGen/TailDuplication.cpp +++ b/contrib/llvm/lib/CodeGen/TailDuplication.cpp @@ -59,7 +59,7 @@ TailDupLimit("tail-dup-limit", cl::init(~0U), cl::Hidden); typedef std::vector<std::pair<MachineBasicBlock*,unsigned> > AvailableValsTy; namespace { - /// TailDuplicatePass - Perform tail duplication. + /// Perform tail duplication. class TailDuplicatePass : public MachineFunctionPass { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; @@ -69,11 +69,11 @@ namespace { std::unique_ptr<RegScavenger> RS; bool PreRegAlloc; - // SSAUpdateVRs - A list of virtual registers for which to update SSA form. + // A list of virtual registers for which to update SSA form. SmallVector<unsigned, 16> SSAUpdateVRs; - // SSAUpdateVals - For each virtual register in SSAUpdateVals keep a list of - // source virtual registers. + // For each virtual register in SSAUpdateVals keep a list of source virtual + // registers. DenseMap<unsigned, AvailableValsTy> SSAUpdateVals; public: @@ -161,7 +161,7 @@ void TailDuplicatePass::getAnalysisUsage(AnalysisUsage &AU) const { static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) { - MachineBasicBlock *MBB = I; + MachineBasicBlock *MBB = &*I; SmallSetVector<MachineBasicBlock*, 8> Preds(MBB->pred_begin(), MBB->pred_end()); MachineBasicBlock::iterator MI = MBB->begin(); @@ -207,7 +207,7 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { } } -/// TailDuplicateAndUpdate - Tail duplicate the block and cleanup. +/// Tail duplicate the block and cleanup. bool TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB, bool IsSimple, @@ -310,9 +310,9 @@ TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB, return true; } -/// TailDuplicateBlocks - Look for small blocks that are unconditionally -/// branched to and do not fall through. Tail-duplicate their instructions -/// into their predecessors to eliminate (dynamic) branches. +/// Look for small blocks that are unconditionally branched to and do not fall +/// through. Tail-duplicate their instructions into their predecessors to +/// eliminate (dynamic) branches. bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) { bool MadeChange = false; @@ -322,7 +322,7 @@ bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) { } for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) { - MachineBasicBlock *MBB = I++; + MachineBasicBlock *MBB = &*I++; if (NumTails == TailDupLimit) break; @@ -375,8 +375,7 @@ static void getRegsUsedByPHIs(const MachineBasicBlock &BB, } } -/// AddSSAUpdateEntry - Add a definition and source virtual registers pair for -/// SSA update. +/// Add a definition and source virtual registers pair for SSA update. void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, MachineBasicBlock *BB) { DenseMap<unsigned, AvailableValsTy>::iterator LI= SSAUpdateVals.find(OrigReg); @@ -390,9 +389,8 @@ void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, } } -/// ProcessPHI - Process PHI node in TailBB by turning it into a copy in PredBB. -/// Remember the source register that's contributed by PredBB and update SSA -/// update map. +/// Process PHI node in TailBB by turning it into a copy in PredBB. Remember the +/// source register that's contributed by PredBB and update SSA update map. void TailDuplicatePass::ProcessPHI( MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, DenseMap<unsigned, unsigned> &LocalVRMap, @@ -422,7 +420,7 @@ void TailDuplicatePass::ProcessPHI( MI->eraseFromParent(); } -/// DuplicateInstruction - Duplicate a TailBB instruction to PredBB and update +/// Duplicate a TailBB instruction to PredBB and update /// the source operands due to earlier PHI translation. void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI, MachineBasicBlock *TailBB, @@ -459,9 +457,9 @@ void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI, PredBB->insert(PredBB->instr_end(), NewMI); } -/// UpdateSuccessorsPHIs - After FromBB is tail duplicated into its predecessor -/// blocks, the successors have gained new predecessors. Update the PHI -/// instructions in them accordingly. +/// After FromBB is tail duplicated into its predecessor blocks, the successors +/// have gained new predecessors. Update the PHI instructions in them +/// accordingly. void TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, SmallVectorImpl<MachineBasicBlock *> &TDBBs, @@ -545,7 +543,7 @@ TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, } } -/// shouldTailDuplicate - Determine if it is profitable to duplicate this block. +/// Determine if it is profitable to duplicate this block. bool TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, bool IsSimple, @@ -563,6 +561,7 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, // compensate for the duplication. unsigned MaxDuplicateCount; if (TailDuplicateSize.getNumOccurrences() == 0 && + // FIXME: Use Function::optForSize(). MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize)) MaxDuplicateCount = 1; else @@ -584,30 +583,51 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, // Check the instructions in the block to determine whether tail-duplication // is invalid or unlikely to be profitable. unsigned InstrCount = 0; - for (MachineBasicBlock::iterator I = TailBB.begin(); I != TailBB.end(); ++I) { + for (MachineInstr &MI : TailBB) { // Non-duplicable things shouldn't be tail-duplicated. - if (I->isNotDuplicable()) + if (MI.isNotDuplicable()) return false; // Do not duplicate 'return' instructions if this is a pre-regalloc run. // A return may expand into a lot more instructions (e.g. reload of callee // saved registers) after PEI. - if (PreRegAlloc && I->isReturn()) + if (PreRegAlloc && MI.isReturn()) return false; // Avoid duplicating calls before register allocation. Calls presents a // barrier to register allocation so duplicating them may end up increasing // spills. - if (PreRegAlloc && I->isCall()) + if (PreRegAlloc && MI.isCall()) return false; - if (!I->isPHI() && !I->isDebugValue()) + if (!MI.isPHI() && !MI.isDebugValue()) InstrCount += 1; if (InstrCount > MaxDuplicateCount) return false; } + // Check if any of the successors of TailBB has a PHI node in which the + // value corresponding to TailBB uses a subregister. + // If a phi node uses a register paired with a subregister, the actual + // "value type" of the phi may differ from the type of the register without + // any subregisters. Due to a bug, tail duplication may add a new operand + // without a necessary subregister, producing an invalid code. This is + // demonstrated by test/CodeGen/Hexagon/tail-dup-subreg-abort.ll. + // Disable tail duplication for this case for now, until the problem is + // fixed. + for (auto SB : TailBB.successors()) { + for (auto &I : *SB) { + if (!I.isPHI()) + break; + unsigned Idx = getPHISrcRegOpIdx(&I, &TailBB); + assert(Idx != 0); + MachineOperand &PU = I.getOperand(Idx); + if (PU.getSubReg() != 0) + return false; + } + } + if (HasIndirectbr && PreRegAlloc) return true; @@ -620,7 +640,7 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, return canCompletelyDuplicateBB(TailBB); } -/// isSimpleBB - True if this BB has only one unconditional jump. +/// True if this BB has only one unconditional jump. bool TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) { if (TailBB->succ_size() != 1) @@ -636,22 +656,16 @@ TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) { static bool bothUsedInPHI(const MachineBasicBlock &A, SmallPtrSet<MachineBasicBlock*, 8> SuccsB) { - for (MachineBasicBlock::const_succ_iterator SI = A.succ_begin(), - SE = A.succ_end(); SI != SE; ++SI) { - MachineBasicBlock *BB = *SI; + for (MachineBasicBlock *BB : A.successors()) if (SuccsB.count(BB) && !BB->empty() && BB->begin()->isPHI()) return true; - } return false; } bool TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB) { - for (MachineBasicBlock::pred_iterator PI = BB.pred_begin(), - PE = BB.pred_end(); PI != PE; ++PI) { - MachineBasicBlock *PredBB = *PI; - + for (MachineBasicBlock *PredBB : BB.predecessors()) { if (PredBB->succ_size() > 1) return false; @@ -680,7 +694,7 @@ TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB, PE = Preds.end(); PI != PE; ++PI) { MachineBasicBlock *PredBB = *PI; - if (PredBB->getLandingPadSuccessor()) + if (PredBB->hasEHPadSuccessor()) continue; if (bothUsedInPHI(*PredBB, Succs)) @@ -696,7 +710,7 @@ TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB, << "From simple Succ: " << *TailBB); MachineBasicBlock *NewTarget = *TailBB->succ_begin(); - MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(PredBB)); + MachineBasicBlock *NextBB = &*std::next(PredBB->getIterator()); // Make PredFBB explicit. if (PredCond.empty()) @@ -731,19 +745,19 @@ TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB, if (PredTBB) TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc()); - uint32_t Weight = MBPI->getEdgeWeight(PredBB, TailBB); - PredBB->removeSuccessor(TailBB); - unsigned NumSuccessors = PredBB->succ_size(); - assert(NumSuccessors <= 1); - if (NumSuccessors == 0 || *PredBB->succ_begin() != NewTarget) - PredBB->addSuccessor(NewTarget, Weight); + if (!PredBB->isSuccessor(NewTarget)) + PredBB->replaceSuccessor(TailBB, NewTarget); + else { + PredBB->removeSuccessor(TailBB, true); + assert(PredBB->succ_size() <= 1); + } TDBBs.push_back(PredBB); } return Changed; } -/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each +/// If it is profitable, duplicate TailBB's contents in each /// of its predecessors. bool TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, @@ -798,13 +812,12 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, RS->enterBasicBlock(PredBB); if (!PredBB->empty()) RS->forward(std::prev(PredBB->end())); - for (MachineBasicBlock::livein_iterator I = TailBB->livein_begin(), - E = TailBB->livein_end(); I != E; ++I) { - if (!RS->isRegUsed(*I, false)) + for (const auto &LI : TailBB->liveins()) { + if (!RS->isRegUsed(LI.PhysReg, false)) // If a register is previously livein to the tail but it's not live // at the end of predecessor BB, then it should be added to its // livein list. - PredBB->addLiveIn(*I); + PredBB->addLiveIn(LI); } } @@ -845,7 +858,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, "TailDuplicate called on block with multiple successors!"); for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(), E = TailBB->succ_end(); I != E; ++I) - PredBB->addSuccessor(*I, MBPI->getEdgeWeight(TailBB, I)); + PredBB->addSuccessor(*I, MBPI->getEdgeProbability(TailBB, I)); Changed = true; ++NumTailDups; @@ -854,7 +867,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, // If TailBB was duplicated into all its predecessors except for the prior // block, which falls through unconditionally, move the contents of this // block into the prior block. - MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(TailBB)); + MachineBasicBlock *PrevBB = &*std::prev(TailBB->getIterator()); MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr; SmallVector<MachineOperand, 4> PriorCond; // This has to check PrevBB->succ_size() because EH edges are ignored by @@ -960,8 +973,8 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, return Changed; } -/// RemoveDeadBlock - Remove the specified dead machine basic block from the -/// function, updating the CFG. +/// Remove the specified dead machine basic block from the function, updating +/// the CFG. void TailDuplicatePass::RemoveDeadBlock(MachineBasicBlock *MBB) { assert(MBB->pred_empty() && "MBB must be dead!"); DEBUG(dbgs() << "\nRemoving MBB: " << *MBB); diff --git a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp index f3cccd82a5c5..679ade185e1c 100644 --- a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/CallingConv.h" #include "llvm/IR/Function.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -32,25 +33,22 @@ bool TargetFrameLowering::noFramePointerElim(const MachineFunction &MF) const { return Attr.getValueAsString() == "true"; } -/// getFrameIndexOffset - Returns the displacement from the frame register to -/// the stack frame of the specified index. This is the default implementation -/// which is overridden for some targets. -int TargetFrameLowering::getFrameIndexOffset(const MachineFunction &MF, - int FI) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - return MFI->getObjectOffset(FI) + MFI->getStackSize() - - getOffsetOfLocalArea() + MFI->getOffsetAdjustment(); -} - +/// Returns the displacement from the frame register to the stack +/// frame of the specified index, along with the frame register used +/// (in output arg FrameReg). This is the default implementation which +/// is overridden for some targets. int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); // By default, assume all frame indices are referenced via whatever // getFrameRegister() says. The target can override this if it's doing // something different. FrameReg = RI->getFrameRegister(MF); - return getFrameIndexOffset(MF, FI); + + return MFI->getObjectOffset(FI) + MFI->getStackSize() - + getOffsetOfLocalArea() + MFI->getOffsetAdjustment(); } bool TargetFrameLowering::needsFrameIndexResolution( @@ -84,3 +82,13 @@ void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF, SavedRegs.set(Reg); } } + +unsigned TargetFrameLowering::getStackAlignmentSkew( + const MachineFunction &MF) const { + // When HHVM function is called, the stack is skewed as the return address + // is removed from the stack before we enter the function. + if (LLVM_UNLIKELY(MF.getFunction()->getCallingConv() == CallingConv::HHVM)) + return MF.getTarget().getPointerSize(); + + return 0; +} diff --git a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp index 97ca0253d376..6eaf991ac700 100644 --- a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -118,23 +118,24 @@ TargetInstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, MBB->addSuccessor(NewDest); } -// commuteInstruction - The default implementation of this method just exchanges -// the two operands returned by findCommutedOpIndices. -MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr *MI, - bool NewMI) const { +MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr *MI, + bool NewMI, + unsigned Idx1, + unsigned Idx2) const { const MCInstrDesc &MCID = MI->getDesc(); bool HasDef = MCID.getNumDefs(); if (HasDef && !MI->getOperand(0).isReg()) // No idea how to commute this instruction. Target should implement its own. return nullptr; - unsigned Idx1, Idx2; - if (!findCommutedOpIndices(MI, Idx1, Idx2)) { - assert(MI->isCommutable() && "Precondition violation: MI must be commutable."); - return nullptr; - } + unsigned CommutableOpIdx1 = Idx1; (void)CommutableOpIdx1; + unsigned CommutableOpIdx2 = Idx2; (void)CommutableOpIdx2; + assert(findCommutedOpIndices(MI, CommutableOpIdx1, CommutableOpIdx2) && + CommutableOpIdx1 == Idx1 && CommutableOpIdx2 == Idx2 && + "TargetInstrInfo::CommuteInstructionImpl(): not commutable operands."); assert(MI->getOperand(Idx1).isReg() && MI->getOperand(Idx2).isReg() && "This only knows how to commute register operands so far"); + unsigned Reg0 = HasDef ? MI->getOperand(0).getReg() : 0; unsigned Reg1 = MI->getOperand(Idx1).getReg(); unsigned Reg2 = MI->getOperand(Idx2).getReg(); @@ -184,9 +185,53 @@ MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr *MI, return MI; } -/// findCommutedOpIndices - If specified MI is commutable, return the two -/// operand indices that would swap value. Return true if the instruction -/// is not in a form which this routine understands. +MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr *MI, + bool NewMI, + unsigned OpIdx1, + unsigned OpIdx2) const { + // If OpIdx1 or OpIdx2 is not specified, then this method is free to choose + // any commutable operand, which is done in findCommutedOpIndices() method + // called below. + if ((OpIdx1 == CommuteAnyOperandIndex || OpIdx2 == CommuteAnyOperandIndex) && + !findCommutedOpIndices(MI, OpIdx1, OpIdx2)) { + assert(MI->isCommutable() && + "Precondition violation: MI must be commutable."); + return nullptr; + } + return commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); +} + +bool TargetInstrInfo::fixCommutedOpIndices(unsigned &ResultIdx1, + unsigned &ResultIdx2, + unsigned CommutableOpIdx1, + unsigned CommutableOpIdx2) { + if (ResultIdx1 == CommuteAnyOperandIndex && + ResultIdx2 == CommuteAnyOperandIndex) { + ResultIdx1 = CommutableOpIdx1; + ResultIdx2 = CommutableOpIdx2; + } else if (ResultIdx1 == CommuteAnyOperandIndex) { + if (ResultIdx2 == CommutableOpIdx1) + ResultIdx1 = CommutableOpIdx2; + else if (ResultIdx2 == CommutableOpIdx2) + ResultIdx1 = CommutableOpIdx1; + else + return false; + } else if (ResultIdx2 == CommuteAnyOperandIndex) { + if (ResultIdx1 == CommutableOpIdx1) + ResultIdx2 = CommutableOpIdx2; + else if (ResultIdx1 == CommutableOpIdx2) + ResultIdx2 = CommutableOpIdx1; + else + return false; + } else + // Check that the result operand indices match the given commutable + // operand indices. + return (ResultIdx1 == CommutableOpIdx1 && ResultIdx2 == CommutableOpIdx2) || + (ResultIdx1 == CommutableOpIdx2 && ResultIdx2 == CommutableOpIdx1); + + return true; +} + bool TargetInstrInfo::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const { @@ -196,10 +241,15 @@ bool TargetInstrInfo::findCommutedOpIndices(MachineInstr *MI, const MCInstrDesc &MCID = MI->getDesc(); if (!MCID.isCommutable()) return false; + // This assumes v0 = op v1, v2 and commuting would swap v1 and v2. If this // is not true, then the target must implement this. - SrcOpIdx1 = MCID.getNumDefs(); - SrcOpIdx2 = SrcOpIdx1 + 1; + unsigned CommutableOpIdx1 = MCID.getNumDefs(); + unsigned CommutableOpIdx2 = CommutableOpIdx1 + 1; + if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, + CommutableOpIdx1, CommutableOpIdx2)) + return false; + if (!MI->getOperand(SrcOpIdx1).isReg() || !MI->getOperand(SrcOpIdx2).isReg()) // No idea. @@ -207,7 +257,6 @@ bool TargetInstrInfo::findCommutedOpIndices(MachineInstr *MI, return true; } - bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { if (!MI->isTerminator()) return false; @@ -315,7 +364,7 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC, assert(RC->getSize() >= (Offset + Size) && "bad subregister range"); - if (!MF.getTarget().getDataLayout()->isLittleEndian()) { + if (!MF.getDataLayout().isLittleEndian()) { Offset = RC->getSize() - (Offset + Size); } return true; @@ -384,11 +433,6 @@ void TargetInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { llvm_unreachable("Not a MachO target"); } -bool TargetInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, - ArrayRef<unsigned> Ops) const { - return MI->isCopy() && Ops.size() == 1 && canFoldCopy(MI, Ops[0]); -} - static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, int FrameIndex, const TargetInstrInfo &TII) { @@ -489,10 +533,9 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, "Folded a use to a non-load!"); const MachineFrameInfo &MFI = *MF.getFrameInfo(); assert(MFI.getObjectOffset(FI) != -1); - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), - Flags, MFI.getObjectSize(FI), - MFI.getObjectAlignment(FI)); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, FI), Flags, MFI.getObjectSize(FI), + MFI.getObjectAlignment(FI)); NewMI->addMemOperand(MF, MMO); return NewMI; @@ -517,6 +560,217 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, return --Pos; } +bool TargetInstrInfo::hasReassociableOperands( + const MachineInstr &Inst, const MachineBasicBlock *MBB) const { + const MachineOperand &Op1 = Inst.getOperand(1); + const MachineOperand &Op2 = Inst.getOperand(2); + const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + + // We need virtual register definitions for the operands that we will + // reassociate. + MachineInstr *MI1 = nullptr; + MachineInstr *MI2 = nullptr; + if (Op1.isReg() && TargetRegisterInfo::isVirtualRegister(Op1.getReg())) + MI1 = MRI.getUniqueVRegDef(Op1.getReg()); + if (Op2.isReg() && TargetRegisterInfo::isVirtualRegister(Op2.getReg())) + MI2 = MRI.getUniqueVRegDef(Op2.getReg()); + + // And they need to be in the trace (otherwise, they won't have a depth). + return MI1 && MI2 && MI1->getParent() == MBB && MI2->getParent() == MBB; +} + +bool TargetInstrInfo::hasReassociableSibling(const MachineInstr &Inst, + bool &Commuted) const { + const MachineBasicBlock *MBB = Inst.getParent(); + const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(1).getReg()); + MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg()); + unsigned AssocOpcode = Inst.getOpcode(); + + // If only one operand has the same opcode and it's the second source operand, + // the operands must be commuted. + Commuted = MI1->getOpcode() != AssocOpcode && MI2->getOpcode() == AssocOpcode; + if (Commuted) + std::swap(MI1, MI2); + + // 1. The previous instruction must be the same type as Inst. + // 2. The previous instruction must have virtual register definitions for its + // operands in the same basic block as Inst. + // 3. The previous instruction's result must only be used by Inst. + return MI1->getOpcode() == AssocOpcode && + hasReassociableOperands(*MI1, MBB) && + MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg()); +} + +// 1. The operation must be associative and commutative. +// 2. The instruction must have virtual register definitions for its +// operands in the same basic block. +// 3. The instruction must have a reassociable sibling. +bool TargetInstrInfo::isReassociationCandidate(const MachineInstr &Inst, + bool &Commuted) const { + return isAssociativeAndCommutative(Inst) && + hasReassociableOperands(Inst, Inst.getParent()) && + hasReassociableSibling(Inst, Commuted); +} + +// The concept of the reassociation pass is that these operations can benefit +// from this kind of transformation: +// +// A = ? op ? +// B = A op X (Prev) +// C = B op Y (Root) +// --> +// A = ? op ? +// B = X op Y +// C = A op B +// +// breaking the dependency between A and B, allowing them to be executed in +// parallel (or back-to-back in a pipeline) instead of depending on each other. + +// FIXME: This has the potential to be expensive (compile time) while not +// improving the code at all. Some ways to limit the overhead: +// 1. Track successful transforms; bail out if hit rate gets too low. +// 2. Only enable at -O3 or some other non-default optimization level. +// 3. Pre-screen pattern candidates here: if an operand of the previous +// instruction is known to not increase the critical path, then don't match +// that pattern. +bool TargetInstrInfo::getMachineCombinerPatterns( + MachineInstr &Root, + SmallVectorImpl<MachineCombinerPattern> &Patterns) const { + + bool Commute; + if (isReassociationCandidate(Root, Commute)) { + // We found a sequence of instructions that may be suitable for a + // reassociation of operands to increase ILP. Specify each commutation + // possibility for the Prev instruction in the sequence and let the + // machine combiner decide if changing the operands is worthwhile. + if (Commute) { + Patterns.push_back(MachineCombinerPattern::REASSOC_AX_YB); + Patterns.push_back(MachineCombinerPattern::REASSOC_XA_YB); + } else { + Patterns.push_back(MachineCombinerPattern::REASSOC_AX_BY); + Patterns.push_back(MachineCombinerPattern::REASSOC_XA_BY); + } + return true; + } + + return false; +} + +/// Attempt the reassociation transformation to reduce critical path length. +/// See the above comments before getMachineCombinerPatterns(). +void TargetInstrInfo::reassociateOps( + MachineInstr &Root, MachineInstr &Prev, + MachineCombinerPattern Pattern, + SmallVectorImpl<MachineInstr *> &InsInstrs, + SmallVectorImpl<MachineInstr *> &DelInstrs, + DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const { + MachineFunction *MF = Root.getParent()->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + const TargetRegisterClass *RC = Root.getRegClassConstraint(0, TII, TRI); + + // This array encodes the operand index for each parameter because the + // operands may be commuted. Each row corresponds to a pattern value, + // and each column specifies the index of A, B, X, Y. + unsigned OpIdx[4][4] = { + { 1, 1, 2, 2 }, + { 1, 2, 2, 1 }, + { 2, 1, 1, 2 }, + { 2, 2, 1, 1 } + }; + + int Row; + switch (Pattern) { + case MachineCombinerPattern::REASSOC_AX_BY: Row = 0; break; + case MachineCombinerPattern::REASSOC_AX_YB: Row = 1; break; + case MachineCombinerPattern::REASSOC_XA_BY: Row = 2; break; + case MachineCombinerPattern::REASSOC_XA_YB: Row = 3; break; + default: llvm_unreachable("unexpected MachineCombinerPattern"); + } + + MachineOperand &OpA = Prev.getOperand(OpIdx[Row][0]); + MachineOperand &OpB = Root.getOperand(OpIdx[Row][1]); + MachineOperand &OpX = Prev.getOperand(OpIdx[Row][2]); + MachineOperand &OpY = Root.getOperand(OpIdx[Row][3]); + MachineOperand &OpC = Root.getOperand(0); + + unsigned RegA = OpA.getReg(); + unsigned RegB = OpB.getReg(); + unsigned RegX = OpX.getReg(); + unsigned RegY = OpY.getReg(); + unsigned RegC = OpC.getReg(); + + if (TargetRegisterInfo::isVirtualRegister(RegA)) + MRI.constrainRegClass(RegA, RC); + if (TargetRegisterInfo::isVirtualRegister(RegB)) + MRI.constrainRegClass(RegB, RC); + if (TargetRegisterInfo::isVirtualRegister(RegX)) + MRI.constrainRegClass(RegX, RC); + if (TargetRegisterInfo::isVirtualRegister(RegY)) + MRI.constrainRegClass(RegY, RC); + if (TargetRegisterInfo::isVirtualRegister(RegC)) + MRI.constrainRegClass(RegC, RC); + + // Create a new virtual register for the result of (X op Y) instead of + // recycling RegB because the MachineCombiner's computation of the critical + // path requires a new register definition rather than an existing one. + unsigned NewVR = MRI.createVirtualRegister(RC); + InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); + + unsigned Opcode = Root.getOpcode(); + bool KillA = OpA.isKill(); + bool KillX = OpX.isKill(); + bool KillY = OpY.isKill(); + + // Create new instructions for insertion. + MachineInstrBuilder MIB1 = + BuildMI(*MF, Prev.getDebugLoc(), TII->get(Opcode), NewVR) + .addReg(RegX, getKillRegState(KillX)) + .addReg(RegY, getKillRegState(KillY)); + MachineInstrBuilder MIB2 = + BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC) + .addReg(RegA, getKillRegState(KillA)) + .addReg(NewVR, getKillRegState(true)); + + setSpecialOperandAttr(Root, Prev, *MIB1, *MIB2); + + // Record new instructions for insertion and old instructions for deletion. + InsInstrs.push_back(MIB1); + InsInstrs.push_back(MIB2); + DelInstrs.push_back(&Prev); + DelInstrs.push_back(&Root); +} + +void TargetInstrInfo::genAlternativeCodeSequence( + MachineInstr &Root, MachineCombinerPattern Pattern, + SmallVectorImpl<MachineInstr *> &InsInstrs, + SmallVectorImpl<MachineInstr *> &DelInstrs, + DenseMap<unsigned, unsigned> &InstIdxForVirtReg) const { + MachineRegisterInfo &MRI = Root.getParent()->getParent()->getRegInfo(); + + // Select the previous instruction in the sequence based on the input pattern. + MachineInstr *Prev = nullptr; + switch (Pattern) { + case MachineCombinerPattern::REASSOC_AX_BY: + case MachineCombinerPattern::REASSOC_XA_BY: + Prev = MRI.getUniqueVRegDef(Root.getOperand(1).getReg()); + break; + case MachineCombinerPattern::REASSOC_AX_YB: + case MachineCombinerPattern::REASSOC_XA_YB: + Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg()); + break; + default: + break; + } + + assert(Prev && "Unknown pattern for machine combiner"); + + reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg); + return; +} + /// foldMemoryOperand - Same as the previous version except it allows folding /// of any load and store from / to any address, not just from a specific /// stack slot. @@ -661,6 +915,7 @@ int TargetInstrInfo::getSPAdjust(const MachineInstr *MI) const { return 0; int SPAdj = MI->getOperand(0).getImm(); + SPAdj = TFI->alignSPAdjust(SPAdj); if ((!StackGrowsDown && MI->getOpcode() == FrameSetupOpcode) || (StackGrowsDown && MI->getOpcode() == FrameDestroyOpcode)) @@ -686,10 +941,7 @@ bool TargetInstrInfo::isSchedulingBoundary(const MachineInstr *MI, // modification. const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - if (MI->modifiesRegister(TLI.getStackPointerRegisterToSaveRestore(), TRI)) - return true; - - return false; + return MI->modifiesRegister(TLI.getStackPointerRegisterToSaveRestore(), TRI); } // Provide a global flag for disabling the PreRA hazard recognizer that targets diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp index ecfd65931574..36a31c9d6461 100644 --- a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -247,13 +247,9 @@ static void InitLibcallNames(const char **Names, const Triple &TT) { Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2"; Names[RTLIB::FPROUND_F128_F64] = "__trunctfdf2"; Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2"; - Names[RTLIB::FPTOSINT_F32_I8] = "__fixsfqi"; - Names[RTLIB::FPTOSINT_F32_I16] = "__fixsfhi"; Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi"; Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi"; Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti"; - Names[RTLIB::FPTOSINT_F64_I8] = "__fixdfqi"; - Names[RTLIB::FPTOSINT_F64_I16] = "__fixdfhi"; Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi"; Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi"; Names[RTLIB::FPTOSINT_F64_I128] = "__fixdfti"; @@ -266,13 +262,9 @@ static void InitLibcallNames(const char **Names, const Triple &TT) { Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi"; Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi"; Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti"; - Names[RTLIB::FPTOUINT_F32_I8] = "__fixunssfqi"; - Names[RTLIB::FPTOUINT_F32_I16] = "__fixunssfhi"; Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi"; Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi"; Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti"; - Names[RTLIB::FPTOUINT_F64_I8] = "__fixunsdfqi"; - Names[RTLIB::FPTOUINT_F64_I16] = "__fixunsdfhi"; Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi"; Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi"; Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti"; @@ -501,10 +493,6 @@ RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) { /// UNKNOWN_LIBCALL if there is none. RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) { if (OpVT == MVT::f32) { - if (RetVT == MVT::i8) - return FPTOSINT_F32_I8; - if (RetVT == MVT::i16) - return FPTOSINT_F32_I16; if (RetVT == MVT::i32) return FPTOSINT_F32_I32; if (RetVT == MVT::i64) @@ -512,10 +500,6 @@ RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) { if (RetVT == MVT::i128) return FPTOSINT_F32_I128; } else if (OpVT == MVT::f64) { - if (RetVT == MVT::i8) - return FPTOSINT_F64_I8; - if (RetVT == MVT::i16) - return FPTOSINT_F64_I16; if (RetVT == MVT::i32) return FPTOSINT_F64_I32; if (RetVT == MVT::i64) @@ -551,10 +535,6 @@ RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) { /// UNKNOWN_LIBCALL if there is none. RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) { if (OpVT == MVT::f32) { - if (RetVT == MVT::i8) - return FPTOUINT_F32_I8; - if (RetVT == MVT::i16) - return FPTOUINT_F32_I16; if (RetVT == MVT::i32) return FPTOUINT_F32_I32; if (RetVT == MVT::i64) @@ -562,10 +542,6 @@ RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) { if (RetVT == MVT::i128) return FPTOUINT_F32_I128; } else if (OpVT == MVT::f64) { - if (RetVT == MVT::i8) - return FPTOUINT_F64_I8; - if (RetVT == MVT::i16) - return FPTOUINT_F64_I16; if (RetVT == MVT::i32) return FPTOUINT_F64_I32; if (RetVT == MVT::i64) @@ -758,17 +734,13 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) { SelectIsExpensive = false; HasMultipleConditionRegisters = false; HasExtractBitsInsn = false; - IntDivIsCheap = false; FsqrtIsCheap = false; - Pow2SDivIsCheap = false; JumpIsExpensive = JumpIsExpensiveOverride; PredictableSelectIsExpensive = false; MaskAndBranchFoldingIsLegal = false; EnableExtLdPromotion = false; HasFloatingPointExceptions = true; StackPointerRegisterToSaveRestore = 0; - ExceptionPointerRegister = 0; - ExceptionSelectorRegister = 0; BooleanContents = UndefinedBooleanContent; BooleanFloatContents = UndefinedBooleanContent; BooleanVectorContents = UndefinedBooleanContent; @@ -778,6 +750,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) { MinFunctionAlignment = 0; PrefFunctionAlignment = 0; PrefLoopAlignment = 0; + GatherAllAliasesMaxDepth = 6; MinStackArgumentAlignment = 1; InsertFencesForAtomic = false; MinimumJumpTableEntries = 4; @@ -814,6 +787,8 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::CONCAT_VECTORS, VT, Expand); setOperationAction(ISD::FMINNUM, VT, Expand); setOperationAction(ISD::FMAXNUM, VT, Expand); + setOperationAction(ISD::FMINNAN, VT, Expand); + setOperationAction(ISD::FMAXNAN, VT, Expand); setOperationAction(ISD::FMAD, VT, Expand); setOperationAction(ISD::SMIN, VT, Expand); setOperationAction(ISD::SMAX, VT, Expand); @@ -828,6 +803,8 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::SMULO, VT, Expand); setOperationAction(ISD::UMULO, VT, Expand); + setOperationAction(ISD::BITREVERSE, VT, Expand); + // These library functions default to expand. setOperationAction(ISD::FROUND, VT, Expand); @@ -838,11 +815,17 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Expand); setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand); } + + // For most targets @llvm.get.dynamic.area.offest just returns 0. + setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand); } // Most targets ignore the @llvm.prefetch intrinsic. setOperationAction(ISD::PREFETCH, MVT::Other, Expand); + // Most targets also ignore the @llvm.readcyclecounter intrinsic. + setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Expand); + // ConstantFP nodes default to expand. Targets can either change this to // Legal, in which case all fp constants are legal, or use isFPImmLegal() // to optimize expansions for certain constants. @@ -1111,6 +1094,19 @@ MachineBasicBlock* TargetLoweringBase::emitPatchPoint(MachineInstr *MI, MachineBasicBlock *MBB) const { MachineFunction &MF = *MI->getParent()->getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + + // We're handling multiple types of operands here: + // PATCHPOINT MetaArgs - live-in, read only, direct + // STATEPOINT Deopt Spill - live-through, read only, indirect + // STATEPOINT Deopt Alloca - live-through, read only, direct + // (We're currently conservative and mark the deopt slots read/write in + // practice.) + // STATEPOINT GC Spill - live-through, read/write, indirect + // STATEPOINT GC Alloca - live-through, read/write, direct + // The live-in vs live-through is handled already (the live through ones are + // all stack slots), but we need to handle the different type of stackmap + // operands and memory effects here. // MI changes inside this loop as we grow operands. for(unsigned OperIdx = 0; OperIdx != MI->getNumOperands(); ++OperIdx) { @@ -1126,10 +1122,24 @@ TargetLoweringBase::emitPatchPoint(MachineInstr *MI, // Copy operands before the frame-index. for (unsigned i = 0; i < OperIdx; ++i) MIB.addOperand(MI->getOperand(i)); - // Add frame index operands: direct-mem-ref tag, #FI, offset. - MIB.addImm(StackMaps::DirectMemRefOp); - MIB.addOperand(MI->getOperand(OperIdx)); - MIB.addImm(0); + // Add frame index operands recognized by stackmaps.cpp + if (MFI.isStatepointSpillSlotObjectIndex(FI)) { + // indirect-mem-ref tag, size, #FI, offset. + // Used for spills inserted by StatepointLowering. This codepath is not + // used for patchpoints/stackmaps at all, for these spilling is done via + // foldMemoryOperand callback only. + assert(MI->getOpcode() == TargetOpcode::STATEPOINT && "sanity"); + MIB.addImm(StackMaps::IndirectMemRefOp); + MIB.addImm(MFI.getObjectSize(FI)); + MIB.addOperand(MI->getOperand(OperIdx)); + MIB.addImm(0); + } else { + // direct-mem-ref tag, #FI, offset. + // Used by patchpoint, and direct alloca arguments to statepoints + MIB.addImm(StackMaps::DirectMemRefOp); + MIB.addOperand(MI->getOperand(OperIdx)); + MIB.addImm(0); + } // Copy the operands after the frame index. for (unsigned i = OperIdx + 1; i != MI->getNumOperands(); ++i) MIB.addOperand(MI->getOperand(i)); @@ -1139,7 +1149,6 @@ TargetLoweringBase::emitPatchPoint(MachineInstr *MI, assert(MIB->mayLoad() && "Folded a stackmap use to a non-load!"); // Add a new memory operand for this FI. - const MachineFrameInfo &MFI = *MF.getFrameInfo(); assert(MFI.getObjectOffset(FI) != -1); unsigned Flags = MachineMemOperand::MOLoad; @@ -1148,8 +1157,8 @@ TargetLoweringBase::emitPatchPoint(MachineInstr *MI, Flags |= MachineMemOperand::MOVolatile; } MachineMemOperand *MMO = MF.getMachineMemOperand( - MachinePointerInfo::getFixedStack(FI), Flags, - TM.getDataLayout()->getPointerSize(), MFI.getObjectAlignment(FI)); + MachinePointerInfo::getFixedStack(MF, FI), Flags, + MF.getDataLayout().getPointerSize(), MFI.getObjectAlignment(FI)); MIB->addMemOperand(MF, MMO); // Replace the instruction and update the operand index. @@ -1274,20 +1283,14 @@ void TargetLoweringBase::computeRegisterProperties( ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat); } + // Decide how to handle f16. If the target does not have native f16 support, + // promote it to f32, because there are no f16 library calls (except for + // conversions). if (!isTypeLegal(MVT::f16)) { - // If the target has native f32 support, promote f16 operations to f32. If - // f32 is not supported, generate soft float library calls. - if (isTypeLegal(MVT::f32)) { - NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::f32]; - RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::f32]; - TransformToType[MVT::f16] = MVT::f32; - ValueTypeActions.setTypeAction(MVT::f16, TypePromoteFloat); - } else { - NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::i16]; - RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::i16]; - TransformToType[MVT::f16] = MVT::i16; - ValueTypeActions.setTypeAction(MVT::f16, TypeSoftenFloat); - } + NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::f32]; + RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::f32]; + TransformToType[MVT::f16] = MVT::f32; + ValueTypeActions.setTypeAction(MVT::f16, TypePromoteFloat); } // Loop over all of the vector value types to see which need transformations. @@ -1528,6 +1531,29 @@ unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty, return DL.getABITypeAlignment(Ty); } +bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, + const DataLayout &DL, EVT VT, + unsigned AddrSpace, + unsigned Alignment, + bool *Fast) const { + // Check if the specified alignment is sufficient based on the data layout. + // TODO: While using the data layout works in practice, a better solution + // would be to implement this check directly (make this a virtual function). + // For example, the ABI alignment may change based on software platform while + // this function should only be affected by hardware implementation. + Type *Ty = VT.getTypeForEVT(Context); + if (Alignment >= DL.getABITypeAlignment(Ty)) { + // Assume that an access that meets the ABI-specified alignment is fast. + if (Fast != nullptr) + *Fast = true; + return true; + } + + // This is a misaligned access. + return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Fast); +} + + //===----------------------------------------------------------------------===// // TargetTransformInfo Helpers //===----------------------------------------------------------------------===// @@ -1546,6 +1572,11 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const { case Invoke: return 0; case Resume: return 0; case Unreachable: return 0; + case CleanupRet: return 0; + case CatchRet: return 0; + case CatchPad: return 0; + case CatchSwitch: return 0; + case CleanupPad: return 0; case Add: return ISD::ADD; case FAdd: return ISD::FADD; case Sub: return ISD::SUB; @@ -1603,13 +1634,13 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const { llvm_unreachable("Unknown instruction type encountered!"); } -std::pair<unsigned, MVT> +std::pair<int, MVT> TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const { LLVMContext &C = Ty->getContext(); EVT MTy = getValueType(DL, Ty); - unsigned Cost = 1; + int Cost = 1; // We keep legalizing the type until we find a legal kind. We assume that // the only operation that costs anything is the split. After splitting // we need to handle two types. @@ -1622,11 +1653,28 @@ TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL, if (LK.first == TypeSplitVector || LK.first == TypeExpandInteger) Cost *= 2; + // Do not loop with f128 type. + if (MTy == LK.second) + return std::make_pair(Cost, MTy.getSimpleVT()); + // Keep legalizing the type. MTy = LK.second; } } +Value *TargetLoweringBase::getSafeStackPointerLocation(IRBuilder<> &IRB) const { + if (!TM.getTargetTriple().isAndroid()) + return nullptr; + + // Android provides a libc function to retrieve the address of the current + // thread's unsafe stack pointer. + Module *M = IRB.GetInsertBlock()->getParent()->getParent(); + Type *StackPtrTy = Type::getInt8PtrTy(M->getContext()); + Value *Fn = M->getOrInsertFunction("__safestack_pointer_address", + StackPtrTy->getPointerTo(0), nullptr); + return IRB.CreateCall(Fn); +} + //===----------------------------------------------------------------------===// // Loop Strength Reduction hooks //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 2f78763d8e02..58ae9cc53bda 100644 --- a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -24,6 +24,7 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionCOFF.h" @@ -32,6 +33,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" +#include "llvm/Support/COFF.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" @@ -58,9 +60,8 @@ MCSymbol *TargetLoweringObjectFileELF::getCFIPersonalitySymbol( report_fatal_error("We do not support this DWARF encoding yet!"); } -void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer, - const TargetMachine &TM, - const MCSymbol *Sym) const { +void TargetLoweringObjectFileELF::emitPersonalityValue( + MCStreamer &Streamer, const DataLayout &DL, const MCSymbol *Sym) const { SmallString<64> NameData("DW.ref."); NameData += Sym->getName(); MCSymbolELF *Label = @@ -72,9 +73,9 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer, unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_GROUP; MCSection *Sec = getContext().getELFSection(NameData, ELF::SHT_PROGBITS, Flags, 0, Label->getName()); - unsigned Size = TM.getDataLayout()->getPointerSize(); + unsigned Size = DL.getPointerSize(); Streamer.SwitchSection(Sec); - Streamer.EmitValueToAlignment(TM.getDataLayout()->getPointerABIAlignment()); + Streamer.EmitValueToAlignment(DL.getPointerABIAlignment()); Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject); const MCExpr *E = MCConstantExpr::create(Size, getContext()); Streamer.emitELFSize(Label, E); @@ -232,14 +233,8 @@ static StringRef getSectionPrefixForGlobal(SectionKind Kind) { return ".tdata"; if (Kind.isThreadBSS()) return ".tbss"; - if (Kind.isDataNoRel()) + if (Kind.isData()) return ".data"; - if (Kind.isDataRelLocal()) - return ".data.rel.local"; - if (Kind.isDataRel()) - return ".data.rel"; - if (Kind.isReadOnlyWithRelLocal()) - return ".data.rel.ro.local"; assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); return ".data.rel.ro"; } @@ -282,8 +277,8 @@ selectELFSectionForGlobal(MCContext &Ctx, const GlobalValue *GV, // We also need alignment here. // FIXME: this is getting the alignment of the character, not the // alignment of the global! - unsigned Align = - TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)); + unsigned Align = GV->getParent()->getDataLayout().getPreferredAlignment( + cast<GlobalVariable>(GV)); std::string SizeSpec = ".rodata.str" + utostr(EntrySize) + "."; Name = SizeSpec + utostr(Align); @@ -350,9 +345,8 @@ bool TargetLoweringObjectFileELF::shouldPutJumpTableInFunctionSection( /// Given a mergeable constant with the specified size and relocation /// information, return a section that it should be placed in. -MCSection * -TargetLoweringObjectFileELF::getSectionForConstant(SectionKind Kind, - const Constant *C) const { +MCSection *TargetLoweringObjectFileELF::getSectionForConstant( + const DataLayout &DL, SectionKind Kind, const Constant *C) const { if (Kind.isMergeableConst4() && MergeableConst4Section) return MergeableConst4Section; if (Kind.isMergeableConst8() && MergeableConst8Section) @@ -362,7 +356,6 @@ TargetLoweringObjectFileELF::getSectionForConstant(SectionKind Kind, if (Kind.isReadOnly()) return ReadOnlySection; - if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection; assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); return DataRelROSection; } @@ -507,7 +500,7 @@ emitModuleFlags(MCStreamer &Streamer, // Get the section. MCSectionMachO *S = getContext().getMachOSection( - Segment, Section, TAA, StubSize, SectionKind::getDataNoRel()); + Segment, Section, TAA, StubSize, SectionKind::getData()); Streamer.SwitchSection(S); Streamer.EmitLabel(getContext(). getOrCreateSymbol(StringRef("L_OBJC_IMAGE_INFO"))); @@ -589,14 +582,16 @@ MCSection *TargetLoweringObjectFileMachO::SelectSectionForGlobal( // FIXME: Alignment check should be handled by section classifier. if (Kind.isMergeable1ByteCString() && - TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32) + GV->getParent()->getDataLayout().getPreferredAlignment( + cast<GlobalVariable>(GV)) < 32) return CStringSection; // Do not put 16-bit arrays in the UString section if they have an // externally visible label, this runs into issues with certain linker // versions. if (Kind.isMergeable2ByteCString() && !GV->hasExternalLinkage() && - TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32) + GV->getParent()->getDataLayout().getPreferredAlignment( + cast<GlobalVariable>(GV)) < 32) return UStringSection; // With MachO only variables whose corresponding symbol starts with 'l' or @@ -634,12 +629,11 @@ MCSection *TargetLoweringObjectFileMachO::SelectSectionForGlobal( return DataSection; } -MCSection * -TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind, - const Constant *C) const { +MCSection *TargetLoweringObjectFileMachO::getSectionForConstant( + const DataLayout &DL, SectionKind Kind, const Constant *C) const { // If this constant requires a relocation, we have to put it in the data // segment, not in the text segment. - if (Kind.isDataRel() || Kind.isReadOnlyWithRel()) + if (Kind.isData() || Kind.isReadOnlyWithRel()) return ConstDataSection; if (Kind.isMergeableConst4()) @@ -706,7 +700,7 @@ MCSymbol *TargetLoweringObjectFileMachO::getCFIPersonalitySymbol( const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel( const MCSymbol *Sym, const MCValue &MV, int64_t Offset, MachineModuleInfo *MMI, MCStreamer &Streamer) const { - // Although MachO 32-bit targets do not explictly have a GOTPCREL relocation + // Although MachO 32-bit targets do not explicitly have a GOTPCREL relocation // as 64-bit do, we replace the GOT equivalent by accessing the final symbol // through a non_lazy_ptr stub instead. One advantage is that it allows the // computation of deltas to final external symbols. Example: @@ -740,7 +734,7 @@ const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel( // non_lazy_ptr stubs. SmallString<128> Name; StringRef Suffix = "$non_lazy_ptr"; - Name += DL->getPrivateGlobalPrefix(); + Name += MMI->getModule()->getDataLayout().getPrivateGlobalPrefix(); Name += Sym->getName(); Name += Suffix; MCSymbol *Stub = Ctx.getOrCreateSymbol(Name); @@ -763,6 +757,29 @@ const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel( return MCBinaryExpr::createSub(LHS, RHS, Ctx); } +static bool canUsePrivateLabel(const MCAsmInfo &AsmInfo, + const MCSection &Section) { + if (!AsmInfo.isSectionAtomizableBySymbols(Section)) + return true; + + // If it is not dead stripped, it is safe to use private labels. + const MCSectionMachO &SMO = cast<MCSectionMachO>(Section); + if (SMO.hasAttribute(MachO::S_ATTR_NO_DEAD_STRIP)) + return true; + + return false; +} + +void TargetLoweringObjectFileMachO::getNameWithPrefix( + SmallVectorImpl<char> &OutName, const GlobalValue *GV, Mangler &Mang, + const TargetMachine &TM) const { + SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM); + const MCSection *TheSection = SectionForGlobal(GV, GVKind, Mang, TM); + bool CannotUsePrivateLabel = + !canUsePrivateLabel(*TM.getMCAsmInfo(), *TheSection); + Mang.getNameWithPrefix(OutName, GV, CannotUsePrivateLabel); +} + //===----------------------------------------------------------------------===// // COFF //===----------------------------------------------------------------------===// @@ -918,7 +935,7 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal( COMDATSymName, Selection); } else { SmallString<256> TmpData; - getNameWithPrefix(TmpData, GV, /*CannotUsePrivateLabel=*/true, Mang, TM); + Mang.getNameWithPrefix(TmpData, GV, /*CannotUsePrivateLabel=*/true); return getContext().getCOFFSection(Name, Characteristics, Kind, TmpData, Selection); } @@ -943,8 +960,9 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal( } void TargetLoweringObjectFileCOFF::getNameWithPrefix( - SmallVectorImpl<char> &OutName, const GlobalValue *GV, - bool CannotUsePrivateLabel, Mangler &Mang, const TargetMachine &TM) const { + SmallVectorImpl<char> &OutName, const GlobalValue *GV, Mangler &Mang, + const TargetMachine &TM) const { + bool CannotUsePrivateLabel = false; if (GV->hasPrivateLinkage() && ((isa<Function>(GV) && TM.getFunctionSections()) || (isa<GlobalVariable>(GV) && TM.getDataSections()))) @@ -1043,7 +1061,7 @@ void TargetLoweringObjectFileCOFF::emitLinkerFlagsForGlobal( raw_string_ostream FlagOS(Flag); Mang.getNameWithPrefix(FlagOS, GV, false); FlagOS.flush(); - if (Flag[0] == DL->getGlobalPrefix()) + if (Flag[0] == GV->getParent()->getDataLayout().getGlobalPrefix()) OS << Flag.substr(1); else OS << Flag; diff --git a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp index 61a66b623928..0a7042ac3db5 100644 --- a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp +++ b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp @@ -11,13 +11,19 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/ADT/BitVector.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetRegisterInfo.h" + +#define DEBUG_TYPE "target-reg-info" using namespace llvm; @@ -34,54 +40,71 @@ TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID, TargetRegisterInfo::~TargetRegisterInfo() {} -void PrintReg::print(raw_ostream &OS) const { - if (!Reg) - OS << "%noreg"; - else if (TargetRegisterInfo::isStackSlot(Reg)) - OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg); - else if (TargetRegisterInfo::isVirtualRegister(Reg)) - OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Reg); - else if (TRI && Reg < TRI->getNumRegs()) - OS << '%' << TRI->getName(Reg); - else - OS << "%physreg" << Reg; - if (SubIdx) { - if (TRI) - OS << ':' << TRI->getSubRegIndexName(SubIdx); +namespace llvm { + +Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI, + unsigned SubIdx) { + return Printable([Reg, TRI, SubIdx](raw_ostream &OS) { + if (!Reg) + OS << "%noreg"; + else if (TargetRegisterInfo::isStackSlot(Reg)) + OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg); + else if (TargetRegisterInfo::isVirtualRegister(Reg)) + OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Reg); + else if (TRI && Reg < TRI->getNumRegs()) + OS << '%' << TRI->getName(Reg); else - OS << ":sub(" << SubIdx << ')'; - } + OS << "%physreg" << Reg; + if (SubIdx) { + if (TRI) + OS << ':' << TRI->getSubRegIndexName(SubIdx); + else + OS << ":sub(" << SubIdx << ')'; + } + }); } -void PrintRegUnit::print(raw_ostream &OS) const { - // Generic printout when TRI is missing. - if (!TRI) { - OS << "Unit~" << Unit; - return; - } +Printable PrintRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) { + return Printable([Unit, TRI](raw_ostream &OS) { + // Generic printout when TRI is missing. + if (!TRI) { + OS << "Unit~" << Unit; + return; + } - // Check for invalid register units. - if (Unit >= TRI->getNumRegUnits()) { - OS << "BadUnit~" << Unit; - return; - } + // Check for invalid register units. + if (Unit >= TRI->getNumRegUnits()) { + OS << "BadUnit~" << Unit; + return; + } - // Normal units have at least one root. - MCRegUnitRootIterator Roots(Unit, TRI); - assert(Roots.isValid() && "Unit has no roots."); - OS << TRI->getName(*Roots); - for (++Roots; Roots.isValid(); ++Roots) - OS << '~' << TRI->getName(*Roots); + // Normal units have at least one root. + MCRegUnitRootIterator Roots(Unit, TRI); + assert(Roots.isValid() && "Unit has no roots."); + OS << TRI->getName(*Roots); + for (++Roots; Roots.isValid(); ++Roots) + OS << '~' << TRI->getName(*Roots); + }); } -void PrintVRegOrUnit::print(raw_ostream &OS) const { - if (TRI && TRI->isVirtualRegister(Unit)) { - OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Unit); - return; - } - PrintRegUnit::print(OS); +Printable PrintVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) { + return Printable([Unit, TRI](raw_ostream &OS) { + if (TRI && TRI->isVirtualRegister(Unit)) { + OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Unit); + } else { + OS << PrintRegUnit(Unit, TRI); + } + }); +} + +Printable PrintLaneMask(LaneBitmask LaneMask) { + return Printable([LaneMask](raw_ostream &OS) { + OS << format("%08X", LaneMask); + }); } +} // End of llvm namespace + /// getAllocatableClass - Return the maximal subclass of the given register /// class that is alloctable, or NULL. const TargetRegisterClass * @@ -161,16 +184,24 @@ BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF, static inline const TargetRegisterClass *firstCommonClass(const uint32_t *A, const uint32_t *B, - const TargetRegisterInfo *TRI) { + const TargetRegisterInfo *TRI, + const MVT::SimpleValueType SVT = + MVT::SimpleValueType::Any) { + const MVT VT(SVT); for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; I += 32) - if (unsigned Common = *A++ & *B++) - return TRI->getRegClass(I + countTrailingZeros(Common)); + if (unsigned Common = *A++ & *B++) { + const TargetRegisterClass *RC = + TRI->getRegClass(I + countTrailingZeros(Common)); + if (SVT == MVT::SimpleValueType::Any || RC->hasType(VT)) + return RC; + } return nullptr; } const TargetRegisterClass * TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A, - const TargetRegisterClass *B) const { + const TargetRegisterClass *B, + const MVT::SimpleValueType SVT) const { // First take care of the trivial cases. if (A == B) return A; @@ -179,7 +210,7 @@ TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A, // Register classes are ordered topologically, so the largest common // sub-class it the common sub-class with the smallest ID. - return firstCommonClass(A->getSubClassMask(), B->getSubClassMask(), this); + return firstCommonClass(A->getSubClassMask(), B->getSubClassMask(), this, SVT); } const TargetRegisterClass * @@ -260,13 +291,55 @@ getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA, return BestRC; } +/// \brief Check if the registers defined by the pair (RegisterClass, SubReg) +/// share the same register file. +static bool shareSameRegisterFile(const TargetRegisterInfo &TRI, + const TargetRegisterClass *DefRC, + unsigned DefSubReg, + const TargetRegisterClass *SrcRC, + unsigned SrcSubReg) { + // Same register class. + if (DefRC == SrcRC) + return true; + + // Both operands are sub registers. Check if they share a register class. + unsigned SrcIdx, DefIdx; + if (SrcSubReg && DefSubReg) { + return TRI.getCommonSuperRegClass(SrcRC, SrcSubReg, DefRC, DefSubReg, + SrcIdx, DefIdx) != nullptr; + } + + // At most one of the register is a sub register, make it Src to avoid + // duplicating the test. + if (!SrcSubReg) { + std::swap(DefSubReg, SrcSubReg); + std::swap(DefRC, SrcRC); + } + + // One of the register is a sub register, check if we can get a superclass. + if (SrcSubReg) + return TRI.getMatchingSuperRegClass(SrcRC, DefRC, SrcSubReg) != nullptr; + + // Plain copy. + return TRI.getCommonSubClass(DefRC, SrcRC) != nullptr; +} + +bool TargetRegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC, + unsigned DefSubReg, + const TargetRegisterClass *SrcRC, + unsigned SrcSubReg) const { + // If this source does not incur a cross register bank copy, use it. + return shareSameRegisterFile(*this, DefRC, DefSubReg, SrcRC, SrcSubReg); +} + // Compute target-independent register allocator hints to help eliminate copies. void TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg, ArrayRef<MCPhysReg> Order, SmallVectorImpl<MCPhysReg> &Hints, const MachineFunction &MF, - const VirtRegMap *VRM) const { + const VirtRegMap *VRM, + const LiveRegMatrix *Matrix) const { const MachineRegisterInfo &MRI = MF.getRegInfo(); std::pair<unsigned, unsigned> Hint = MRI.getRegAllocationHint(VirtReg); @@ -295,6 +368,26 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg, Hints.push_back(Phys); } +bool TargetRegisterInfo::canRealignStack(const MachineFunction &MF) const { + return !MF.getFunction()->hasFnAttribute("no-realign-stack"); +} + +bool TargetRegisterInfo::needsStackRealignment( + const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const Function *F = MF.getFunction(); + unsigned StackAlign = TFI->getStackAlignment(); + bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || + F->hasFnAttribute(Attribute::StackAlignment)); + if (MF.getFunction()->hasFnAttribute("stackrealign") || requiresRealignment) { + if (canRealignStack(MF)) + return true; + DEBUG(dbgs() << "Can't realign function's stack: " << F->getName() << "\n"); + } + return false; +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex, diff --git a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp index 299380d9268b..fc656396ade8 100644 --- a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp +++ b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp @@ -211,11 +211,9 @@ unsigned TargetSchedModel::computeOperandLatency( if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit() && !DefMI->getDesc().OpInfo[DefOperIdx].isOptionalDef() && SchedModel.isComplete()) { - std::string Err; - raw_string_ostream ss(Err); - ss << "DefIdx " << DefIdx << " exceeds machine model writes for " - << *DefMI; - report_fatal_error(ss.str()); + errs() << "DefIdx " << DefIdx << " exceeds machine model writes for " + << *DefMI; + llvm_unreachable("incomplete machine model"); } #endif // FIXME: Automatically giving all implicit defs defaultDefLatency is diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index 1e30821dc741..c6bae2434586 100644 --- a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -83,21 +83,20 @@ class TwoAddressInstructionPass : public MachineFunctionPass { // The current basic block being processed. MachineBasicBlock *MBB; - // DistanceMap - Keep track the distance of a MI from the start of the - // current basic block. + // Keep track the distance of a MI from the start of the current basic block. DenseMap<MachineInstr*, unsigned> DistanceMap; // Set of already processed instructions in the current block. SmallPtrSet<MachineInstr*, 8> Processed; - // SrcRegMap - A map from virtual registers to physical registers which are - // likely targets to be coalesced to due to copies from physical registers to - // virtual registers. e.g. v1024 = move r0. + // A map from virtual registers to physical registers which are likely targets + // to be coalesced to due to copies from physical registers to virtual + // registers. e.g. v1024 = move r0. DenseMap<unsigned, unsigned> SrcRegMap; - // DstRegMap - A map from virtual registers to physical registers which are - // likely targets to be coalesced to due to copies to physical registers from - // virtual registers. e.g. r1 = move v1024. + // A map from virtual registers to physical registers which are likely targets + // to be coalesced to due to copies to physical registers from virtual + // registers. e.g. r1 = move v1024. DenseMap<unsigned, unsigned> DstRegMap; bool sink3AddrInstruction(MachineInstr *MI, unsigned Reg, @@ -110,8 +109,8 @@ class TwoAddressInstructionPass : public MachineFunctionPass { bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, MachineInstr *MI, unsigned Dist); - bool commuteInstruction(MachineBasicBlock::iterator &mi, - unsigned RegB, unsigned RegC, unsigned Dist); + bool commuteInstruction(MachineInstr *MI, + unsigned RegBIdx, unsigned RegCIdx, unsigned Dist); bool isProfitableToConv3Addr(unsigned RegA, unsigned RegB); @@ -133,6 +132,11 @@ class TwoAddressInstructionPass : public MachineFunctionPass { unsigned SrcIdx, unsigned DstIdx, unsigned Dist, bool shouldOnlyCommute); + bool tryInstructionCommute(MachineInstr *MI, + unsigned DstOpIdx, + unsigned BaseOpIdx, + bool BaseOpKilled, + unsigned Dist); void scanUses(unsigned DstReg); void processCopy(MachineInstr *MI); @@ -151,7 +155,7 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); - AU.addRequired<AliasAnalysis>(); + AU.addRequired<AAResultsWrapperPass>(); AU.addPreserved<LiveVariables>(); AU.addPreserved<SlotIndexes>(); AU.addPreserved<LiveIntervals>(); @@ -160,7 +164,7 @@ public: MachineFunctionPass::getAnalysisUsage(AU); } - /// runOnMachineFunction - Pass entry point. + /// Pass entry point. bool runOnMachineFunction(MachineFunction&) override; }; } // end anonymous namespace @@ -168,7 +172,7 @@ public: char TwoAddressInstructionPass::ID = 0; INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, "twoaddressinstruction", "Two-Address instruction pass", false, false) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(TwoAddressInstructionPass, "twoaddressinstruction", "Two-Address instruction pass", false, false) @@ -176,10 +180,9 @@ char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID; static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, LiveIntervals *LIS); -/// sink3AddrInstruction - A two-address instruction has been converted to a -/// three-address instruction to avoid clobbering a register. Try to sink it -/// past the instruction that would kill the above mentioned register to reduce -/// register pressure. +/// A two-address instruction has been converted to a three-address instruction +/// to avoid clobbering a register. Try to sink it past the instruction that +/// would kill the above mentioned register to reduce register pressure. bool TwoAddressInstructionPass:: sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, MachineBasicBlock::iterator OldPos) { @@ -195,8 +198,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, unsigned DefReg = 0; SmallSet<unsigned, 4> UseRegs; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg()) continue; unsigned MOReg = MO.getReg(); @@ -231,10 +233,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, KillMI = LIS->getInstructionFromIndex(I->end); } if (!KillMI) { - for (MachineRegisterInfo::use_nodbg_iterator - UI = MRI->use_nodbg_begin(SavedReg), - UE = MRI->use_nodbg_end(); UI != UE; ++UI) { - MachineOperand &UseMO = *UI; + for (MachineOperand &UseMO : MRI->use_nodbg_operands(SavedReg)) { if (!UseMO.isKill()) continue; KillMI = UseMO.getParent(); @@ -312,8 +311,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, return true; } -/// getSingleDef -- return the MachineInstr* if it is the single def of the Reg -/// in current BB. +/// Return the MachineInstr* if it is the single def of the Reg in current BB. static MachineInstr *getSingleDef(unsigned Reg, MachineBasicBlock *BB, const MachineRegisterInfo *MRI) { MachineInstr *Ret = nullptr; @@ -351,10 +349,10 @@ bool TwoAddressInstructionPass::isRevCopyChain(unsigned FromReg, unsigned ToReg, return false; } -/// noUseAfterLastDef - Return true if there are no intervening uses between the -/// last instruction in the MBB that defines the specified register and the -/// two-address instruction which is being processed. It also returns the last -/// def location by reference +/// Return true if there are no intervening uses between the last instruction +/// in the MBB that defines the specified register and the two-address +/// instruction which is being processed. It also returns the last def location +/// by reference. bool TwoAddressInstructionPass::noUseAfterLastDef(unsigned Reg, unsigned Dist, unsigned &LastDef) { LastDef = 0; @@ -375,9 +373,9 @@ bool TwoAddressInstructionPass::noUseAfterLastDef(unsigned Reg, unsigned Dist, return !(LastUse > LastDef && LastUse < Dist); } -/// isCopyToReg - Return true if the specified MI is a copy instruction or -/// a extract_subreg instruction. It also returns the source and destination -/// registers and whether they are physical registers by reference. +/// Return true if the specified MI is a copy instruction or an extract_subreg +/// instruction. It also returns the source and destination registers and +/// whether they are physical registers by reference. static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII, unsigned &SrcReg, unsigned &DstReg, bool &IsSrcPhys, bool &IsDstPhys) { @@ -397,8 +395,8 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII, return true; } -/// isPLainlyKilled - Test if the given register value, which is used by the -// given instruction, is killed by the given instruction. +/// Test if the given register value, which is used by the +/// given instruction, is killed by the given instruction. static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, LiveIntervals *LIS) { if (LIS && TargetRegisterInfo::isVirtualRegister(Reg) && @@ -424,7 +422,7 @@ static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, return MI->killsRegister(Reg); } -/// isKilled - Test if the given register value, which is used by the given +/// Test if the given register value, which is used by the given /// instruction, is killed by the given instruction. This looks through /// coalescable copies to see if the original value is potentially not killed. /// @@ -472,8 +470,8 @@ static bool isKilled(MachineInstr &MI, unsigned Reg, } } -/// isTwoAddrUse - Return true if the specified MI uses the specified register -/// as a two-address use. If so, return the destination register by reference. +/// Return true if the specified MI uses the specified register as a two-address +/// use. If so, return the destination register by reference. static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) { for (unsigned i = 0, NumOps = MI.getNumOperands(); i != NumOps; ++i) { const MachineOperand &MO = MI.getOperand(i); @@ -488,8 +486,8 @@ static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) { return false; } -/// findOnlyInterestingUse - Given a register, if has a single in-basic block -/// use, return the use instruction if it's a copy or a two-address use. +/// Given a register, if has a single in-basic block use, return the use +/// instruction if it's a copy or a two-address use. static MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB, MachineRegisterInfo *MRI, @@ -516,8 +514,8 @@ MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB, return nullptr; } -/// getMappedReg - Return the physical register the specified virtual register -/// might be mapped to. +/// Return the physical register the specified virtual register might be mapped +/// to. static unsigned getMappedReg(unsigned Reg, DenseMap<unsigned, unsigned> &RegMap) { while (TargetRegisterInfo::isVirtualRegister(Reg)) { @@ -531,8 +529,7 @@ getMappedReg(unsigned Reg, DenseMap<unsigned, unsigned> &RegMap) { return 0; } -/// regsAreCompatible - Return true if the two registers are equal or aliased. -/// +/// Return true if the two registers are equal or aliased. static bool regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) { if (RegA == RegB) @@ -543,8 +540,8 @@ regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) { } -/// isProfitableToCommute - Return true if it's potentially profitable to commute -/// the two-address instruction that's being processed. +/// Return true if it's potentially profitable to commute the two-address +/// instruction that's being processed. bool TwoAddressInstructionPass:: isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, @@ -642,15 +639,15 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, return LastDefB && LastDefC && LastDefC > LastDefB; } -/// commuteInstruction - Commute a two-address instruction and update the basic -/// block, distance map, and live variables if needed. Return true if it is -/// successful. -bool TwoAddressInstructionPass:: -commuteInstruction(MachineBasicBlock::iterator &mi, - unsigned RegB, unsigned RegC, unsigned Dist) { - MachineInstr *MI = mi; +/// Commute a two-address instruction and update the basic block, distance map, +/// and live variables if needed. Return true if it is successful. +bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI, + unsigned RegBIdx, + unsigned RegCIdx, + unsigned Dist) { + unsigned RegC = MI->getOperand(RegCIdx).getReg(); DEBUG(dbgs() << "2addr: COMMUTING : " << *MI); - MachineInstr *NewMI = TII->commuteInstruction(MI); + MachineInstr *NewMI = TII->commuteInstruction(MI, false, RegBIdx, RegCIdx); if (NewMI == nullptr) { DEBUG(dbgs() << "2addr: COMMUTING FAILED!\n"); @@ -672,8 +669,8 @@ commuteInstruction(MachineBasicBlock::iterator &mi, return true; } -/// isProfitableToConv3Addr - Return true if it is profitable to convert the -/// given 2-address instruction to a 3-address one. +/// Return true if it is profitable to convert the given 2-address instruction +/// to a 3-address one. bool TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA,unsigned RegB){ // Look for situations like this: @@ -689,17 +686,18 @@ TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA,unsigned RegB){ return (ToRegA && !regsAreCompatible(FromRegB, ToRegA, TRI)); } -/// convertInstTo3Addr - Convert the specified two-address instruction into a -/// three address one. Return true if this transformation was successful. +/// Convert the specified two-address instruction into a three address one. +/// Return true if this transformation was successful. bool TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, unsigned RegA, unsigned RegB, unsigned Dist) { // FIXME: Why does convertToThreeAddress() need an iterator reference? - MachineFunction::iterator MFI = MBB; + MachineFunction::iterator MFI = MBB->getIterator(); MachineInstr *NewMI = TII->convertToThreeAddress(MFI, mi, LV); - assert(MBB == MFI && "convertToThreeAddress changed iterator reference"); + assert(MBB->getIterator() == MFI && + "convertToThreeAddress changed iterator reference"); if (!NewMI) return false; @@ -730,8 +728,8 @@ TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi, return true; } -/// scanUses - Scan forward recursively for only uses, update maps if the use -/// is a copy or a two-address instruction. +/// Scan forward recursively for only uses, update maps if the use is a copy or +/// a two-address instruction. void TwoAddressInstructionPass::scanUses(unsigned DstReg) { SmallVector<unsigned, 4> VirtRegPairs; @@ -777,8 +775,8 @@ TwoAddressInstructionPass::scanUses(unsigned DstReg) { } } -/// processCopy - If the specified instruction is not yet processed, process it -/// if it's a copy. For a copy instruction, we find the physical registers the +/// If the specified instruction is not yet processed, process it if it's a +/// copy. For a copy instruction, we find the physical registers the /// source and destination registers might be mapped to. These are kept in /// point-to maps used to determine future optimizations. e.g. /// v1024 = mov r0 @@ -813,9 +811,9 @@ void TwoAddressInstructionPass::processCopy(MachineInstr *MI) { return; } -/// rescheduleMIBelowKill - If there is one more local instruction that reads -/// 'Reg' and it kills 'Reg, consider moving the instruction below the kill -/// instruction in order to eliminate the need for the copy. +/// If there is one more local instruction that reads 'Reg' and it kills 'Reg, +/// consider moving the instruction below the kill instruction in order to +/// eliminate the need for the copy. bool TwoAddressInstructionPass:: rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, @@ -871,8 +869,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, SmallSet<unsigned, 2> Uses; SmallSet<unsigned, 2> Kills; SmallSet<unsigned, 2> Defs; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg()) continue; unsigned MOReg = MO.getReg(); @@ -914,8 +911,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, OtherMI->isBranch() || OtherMI->isTerminator()) // Don't move pass calls, etc. return false; - for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = OtherMI->getOperand(i); + for (const MachineOperand &MO : OtherMI->operands()) { if (!MO.isReg()) continue; unsigned MOReg = MO.getReg(); @@ -984,8 +980,8 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, return true; } -/// isDefTooClose - Return true if the re-scheduling will put the given -/// instruction too close to the defs of its register dependencies. +/// Return true if the re-scheduling will put the given instruction too close +/// to the defs of its register dependencies. bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist, MachineInstr *MI) { for (MachineInstr &DefMI : MRI->def_instructions(Reg)) { @@ -1004,10 +1000,9 @@ bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist, return false; } -/// rescheduleKillAboveMI - If there is one more local instruction that reads -/// 'Reg' and it kills 'Reg, consider moving the kill instruction above the -/// current two-address instruction in order to eliminate the need for the -/// copy. +/// If there is one more local instruction that reads 'Reg' and it kills 'Reg, +/// consider moving the kill instruction above the current two-address +/// instruction in order to eliminate the need for the copy. bool TwoAddressInstructionPass:: rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, @@ -1055,8 +1050,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, SmallSet<unsigned, 2> Kills; SmallSet<unsigned, 2> Defs; SmallSet<unsigned, 2> LiveDefs; - for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = KillMI->getOperand(i); + for (const MachineOperand &MO : KillMI->operands()) { if (!MO.isReg()) continue; unsigned MOReg = MO.getReg(); @@ -1094,8 +1088,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, // Don't move pass calls, etc. return false; SmallVector<unsigned, 2> OtherDefs; - for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = OtherMI->getOperand(i); + for (const MachineOperand &MO : OtherMI->operands()) { if (!MO.isReg()) continue; unsigned MOReg = MO.getReg(); @@ -1155,13 +1148,68 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, return true; } -/// tryInstructionTransform - For the case where an instruction has a single -/// pair of tied register operands, attempt some transformations that may -/// either eliminate the tied operands or improve the opportunities for -/// coalescing away the register copy. Returns true if no copy needs to be -/// inserted to untie mi's operands (either because they were untied, or -/// because mi was rescheduled, and will be visited again later). If the -/// shouldOnlyCommute flag is true, only instruction commutation is attempted. +/// Tries to commute the operand 'BaseOpIdx' and some other operand in the +/// given machine instruction to improve opportunities for coalescing and +/// elimination of a register to register copy. +/// +/// 'DstOpIdx' specifies the index of MI def operand. +/// 'BaseOpKilled' specifies if the register associated with 'BaseOpIdx' +/// operand is killed by the given instruction. +/// The 'Dist' arguments provides the distance of MI from the start of the +/// current basic block and it is used to determine if it is profitable +/// to commute operands in the instruction. +/// +/// Returns true if the transformation happened. Otherwise, returns false. +bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI, + unsigned DstOpIdx, + unsigned BaseOpIdx, + bool BaseOpKilled, + unsigned Dist) { + unsigned DstOpReg = MI->getOperand(DstOpIdx).getReg(); + unsigned BaseOpReg = MI->getOperand(BaseOpIdx).getReg(); + unsigned OpsNum = MI->getDesc().getNumOperands(); + unsigned OtherOpIdx = MI->getDesc().getNumDefs(); + for (; OtherOpIdx < OpsNum; OtherOpIdx++) { + // The call of findCommutedOpIndices below only checks if BaseOpIdx + // and OtherOpIdx are commutable, it does not really search for + // other commutable operands and does not change the values of passed + // variables. + if (OtherOpIdx == BaseOpIdx || + !TII->findCommutedOpIndices(MI, BaseOpIdx, OtherOpIdx)) + continue; + + unsigned OtherOpReg = MI->getOperand(OtherOpIdx).getReg(); + bool AggressiveCommute = false; + + // If OtherOp dies but BaseOp does not, swap the OtherOp and BaseOp + // operands. This makes the live ranges of DstOp and OtherOp joinable. + bool DoCommute = + !BaseOpKilled && isKilled(*MI, OtherOpReg, MRI, TII, LIS, false); + + if (!DoCommute && + isProfitableToCommute(DstOpReg, BaseOpReg, OtherOpReg, MI, Dist)) { + DoCommute = true; + AggressiveCommute = true; + } + + // If it's profitable to commute, try to do so. + if (DoCommute && commuteInstruction(MI, BaseOpIdx, OtherOpIdx, Dist)) { + ++NumCommuted; + if (AggressiveCommute) + ++NumAggrCommuted; + return true; + } + } + return false; +} + +/// For the case where an instruction has a single pair of tied register +/// operands, attempt some transformations that may either eliminate the tied +/// operands or improve the opportunities for coalescing away the register copy. +/// Returns true if no copy needs to be inserted to untie mi's operands +/// (either because they were untied, or because mi was rescheduled, and will +/// be visited again later). If the shouldOnlyCommute flag is true, only +/// instruction commutation is attempted. bool TwoAddressInstructionPass:: tryInstructionTransform(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, @@ -1181,51 +1229,18 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, if (TargetRegisterInfo::isVirtualRegister(regA)) scanUses(regA); - // Check if it is profitable to commute the operands. - unsigned SrcOp1, SrcOp2; - unsigned regC = 0; - unsigned regCIdx = ~0U; - bool TryCommute = false; - bool AggressiveCommute = false; - if (MI.isCommutable() && MI.getNumOperands() >= 3 && - TII->findCommutedOpIndices(&MI, SrcOp1, SrcOp2)) { - if (SrcIdx == SrcOp1) - regCIdx = SrcOp2; - else if (SrcIdx == SrcOp2) - regCIdx = SrcOp1; - - if (regCIdx != ~0U) { - regC = MI.getOperand(regCIdx).getReg(); - if (!regBKilled && isKilled(MI, regC, MRI, TII, LIS, false)) - // If C dies but B does not, swap the B and C operands. - // This makes the live ranges of A and C joinable. - TryCommute = true; - else if (isProfitableToCommute(regA, regB, regC, &MI, Dist)) { - TryCommute = true; - AggressiveCommute = true; - } - } - } + bool Commuted = tryInstructionCommute(&MI, DstIdx, SrcIdx, regBKilled, Dist); // If the instruction is convertible to 3 Addr, instead // of returning try 3 Addr transformation aggresively and // use this variable to check later. Because it might be better. // For example, we can just use `leal (%rsi,%rdi), %eax` and `ret` // instead of the following code. - // addl %esi, %edi - // movl %edi, %eax + // addl %esi, %edi + // movl %edi, %eax // ret - bool Commuted = false; - - // If it's profitable to commute, try to do so. - if (TryCommute && commuteInstruction(mi, regB, regC, Dist)) { - Commuted = true; - ++NumCommuted; - if (AggressiveCommute) - ++NumAggrCommuted; - if (!MI.isConvertibleTo3Addr()) - return false; - } + if (Commuted && !MI.isConvertibleTo3Addr()) + return false; if (shouldOnlyCommute) return false; @@ -1237,6 +1252,13 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, return true; } + // If we commuted, regB may have changed so we should re-sample it to avoid + // confusing the three address conversion below. + if (Commuted) { + regB = MI.getOperand(SrcIdx).getReg(); + regBKilled = isKilled(MI, regB, MRI, TII, LIS, true); + } + if (MI.isConvertibleTo3Addr()) { // This instruction is potentially convertible to a true // three-address instruction. Check if it is profitable. @@ -1348,10 +1370,9 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, SmallVector<unsigned, 4> OrigRegs; if (LIS) { - for (MachineInstr::const_mop_iterator MOI = MI.operands_begin(), - MOE = MI.operands_end(); MOI != MOE; ++MOI) { - if (MOI->isReg()) - OrigRegs.push_back(MOI->getReg()); + for (const MachineOperand &MO : MI.operands()) { + if (MO.isReg()) + OrigRegs.push_back(MO.getReg()); } } @@ -1536,12 +1557,10 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, SrcRegMap[RegA] = RegB; } - if (AllUsesCopied) { if (!IsEarlyClobber) { // Replace other (un-tied) uses of regB with LastCopiedReg. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); + for (MachineOperand &MO : MI->operands()) { if (MO.isReg() && MO.getReg() == RegB && MO.getSubReg() == SubRegB && MO.isUse()) { if (MO.isKill()) { @@ -1578,8 +1597,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, // regB is still used in this instruction, but a kill flag was // removed from a different tied use of regB, so now we need to add // a kill flag to one of the remaining uses of regB. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); + for (MachineOperand &MO : MI->operands()) { if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) { MO.setIsKill(true); break; @@ -1588,8 +1606,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, } } -/// runOnMachineFunction - Reduce two-address instructions to two operands. -/// +/// Reduce two-address instructions to two operands. bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { MF = &Func; const TargetMachine &TM = MF->getTarget(); @@ -1599,7 +1616,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { InstrItins = MF->getSubtarget().getInstrItineraryData(); LV = getAnalysisIfAvailable<LiveVariables>(); LIS = getAnalysisIfAvailable<LiveIntervals>(); - AA = &getAnalysis<AliasAnalysis>(); + AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); OptLevel = TM.getOptLevel(); bool MadeChange = false; @@ -1614,7 +1631,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { TiedOperandMap TiedOperands; for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); MBBI != MBBE; ++MBBI) { - MBB = MBBI; + MBB = &*MBBI; unsigned Dist = 0; DistanceMap.clear(); SrcRegMap.clear(); @@ -1661,8 +1678,8 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { unsigned DstReg = mi->getOperand(DstIdx).getReg(); if (SrcReg != DstReg && tryInstructionTransform(mi, nmi, SrcIdx, DstIdx, Dist, false)) { - // The tied operands have been eliminated or shifted further down the - // block to ease elimination. Continue processing with 'nmi'. + // The tied operands have been eliminated or shifted further down + // the block to ease elimination. Continue processing with 'nmi'. TiedOperands.clear(); mi = nmi; continue; @@ -1671,9 +1688,8 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { } // Now iterate over the information collected above. - for (TiedOperandMap::iterator OI = TiedOperands.begin(), - OE = TiedOperands.end(); OI != OE; ++OI) { - processTiedPairs(mi, OI->second, Dist); + for (auto &TO : TiedOperands) { + processTiedPairs(mi, TO.second, Dist); DEBUG(dbgs() << "\t\trewrite to:\t" << *mi); } diff --git a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp index d393e103104d..8c9631e435bf 100644 --- a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp +++ b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp @@ -71,8 +71,8 @@ bool UnreachableBlockElim::runOnFunction(Function &F) { // in them. std::vector<BasicBlock*> DeadBlocks; for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) - if (!Reachable.count(I)) { - BasicBlock *BB = I; + if (!Reachable.count(&*I)) { + BasicBlock *BB = &*I; DeadBlocks.push_back(BB); while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) { PN->replaceAllUsesWith(Constant::getNullValue(PN->getType())); @@ -131,7 +131,7 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { // in them. std::vector<MachineBasicBlock*> DeadBlocks; for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) { - MachineBasicBlock *BB = I; + MachineBasicBlock *BB = &*I; // Test for deadness. if (!Reachable.count(BB)) { @@ -167,7 +167,7 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { // Cleanup PHI nodes. for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) { - MachineBasicBlock *BB = I; + MachineBasicBlock *BB = &*I; // Prune unneeded PHI entries. SmallPtrSet<MachineBasicBlock*, 8> preds(BB->pred_begin(), BB->pred_end()); diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp index 2912bdd63426..bf1c0dce9e56 100644 --- a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp +++ b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp @@ -163,11 +163,12 @@ class VirtRegRewriter : public MachineFunctionPass { SlotIndexes *Indexes; LiveIntervals *LIS; VirtRegMap *VRM; - SparseSet<unsigned> PhysRegs; void rewrite(); void addMBBLiveIns(); bool readsUndefSubreg(const MachineOperand &MO) const; + void addLiveInsForSubRanges(const LiveInterval &LI, unsigned PhysReg) const; + public: static char ID; VirtRegRewriter() : MachineFunctionPass(ID) {} @@ -237,10 +238,52 @@ bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) { return true; } +void VirtRegRewriter::addLiveInsForSubRanges(const LiveInterval &LI, + unsigned PhysReg) const { + assert(!LI.empty()); + assert(LI.hasSubRanges()); + + typedef std::pair<const LiveInterval::SubRange *, + LiveInterval::const_iterator> SubRangeIteratorPair; + SmallVector<SubRangeIteratorPair, 4> SubRanges; + SlotIndex First; + SlotIndex Last; + for (const LiveInterval::SubRange &SR : LI.subranges()) { + SubRanges.push_back(std::make_pair(&SR, SR.begin())); + if (!First.isValid() || SR.segments.front().start < First) + First = SR.segments.front().start; + if (!Last.isValid() || SR.segments.back().end > Last) + Last = SR.segments.back().end; + } + + // Check all mbb start positions between First and Last while + // simulatenously advancing an iterator for each subrange. + for (SlotIndexes::MBBIndexIterator MBBI = Indexes->findMBBIndex(First); + MBBI != Indexes->MBBIndexEnd() && MBBI->first <= Last; ++MBBI) { + SlotIndex MBBBegin = MBBI->first; + // Advance all subrange iterators so that their end position is just + // behind MBBBegin (or the iterator is at the end). + LaneBitmask LaneMask = 0; + for (auto &RangeIterPair : SubRanges) { + const LiveInterval::SubRange *SR = RangeIterPair.first; + LiveInterval::const_iterator &SRI = RangeIterPair.second; + while (SRI != SR->end() && SRI->end <= MBBBegin) + ++SRI; + if (SRI == SR->end()) + continue; + if (SRI->start <= MBBBegin) + LaneMask |= SR->LaneMask; + } + if (LaneMask == 0) + continue; + MachineBasicBlock *MBB = MBBI->second; + MBB->addLiveIn(PhysReg, LaneMask); + } +} + // Compute MBB live-in lists from virtual register live ranges and their // assignments. void VirtRegRewriter::addMBBLiveIns() { - SmallVector<MachineBasicBlock*, 16> LiveIn; for (unsigned Idx = 0, IdxE = MRI->getNumVirtRegs(); Idx != IdxE; ++Idx) { unsigned VirtReg = TargetRegisterInfo::index2VirtReg(Idx); if (MRI->reg_nodbg_empty(VirtReg)) @@ -254,31 +297,18 @@ void VirtRegRewriter::addMBBLiveIns() { assert(PhysReg != VirtRegMap::NO_PHYS_REG && "Unmapped virtual register."); if (LI.hasSubRanges()) { - for (LiveInterval::SubRange &S : LI.subranges()) { - for (const auto &Seg : S.segments) { - if (!Indexes->findLiveInMBBs(Seg.start, Seg.end, LiveIn)) - continue; - for (MCSubRegIndexIterator SR(PhysReg, TRI); SR.isValid(); ++SR) { - unsigned SubReg = SR.getSubReg(); - unsigned SubRegIndex = SR.getSubRegIndex(); - unsigned SubRegLaneMask = TRI->getSubRegIndexLaneMask(SubRegIndex); - if ((SubRegLaneMask & S.LaneMask) == 0) - continue; - for (unsigned i = 0, e = LiveIn.size(); i != e; ++i) { - LiveIn[i]->addLiveIn(SubReg); - } - } - LiveIn.clear(); - } - } + addLiveInsForSubRanges(LI, PhysReg); } else { - // Scan the segments of LI. - for (const auto &Seg : LI.segments) { - if (!Indexes->findLiveInMBBs(Seg.start, Seg.end, LiveIn)) - continue; - for (unsigned i = 0, e = LiveIn.size(); i != e; ++i) - LiveIn[i]->addLiveIn(PhysReg); - LiveIn.clear(); + // Go over MBB begin positions and see if we have segments covering them. + // The following works because segments and the MBBIndex list are both + // sorted by slot indexes. + SlotIndexes::MBBIndexIterator I = Indexes->MBBIndexBegin(); + for (const auto &Seg : LI) { + I = Indexes->advanceMBBIndex(I, Seg.start); + for (; I != Indexes->MBBIndexEnd() && I->first < Seg.end; ++I) { + MachineBasicBlock *MBB = I->second; + MBB->addLiveIn(PhysReg); + } } } } @@ -305,7 +335,7 @@ bool VirtRegRewriter::readsUndefSubreg(const MachineOperand &MO) const { assert(LI.liveAt(BaseIndex) && "Reads of completely dead register should be marked undef already"); unsigned SubRegIdx = MO.getSubReg(); - unsigned UseMask = TRI->getSubRegIndexLaneMask(SubRegIdx); + LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(SubRegIdx); // See if any of the relevant subregister liveranges is defined at this point. for (const LiveInterval::SubRange &SR : LI.subranges()) { if ((SR.LaneMask & UseMask) != 0 && SR.liveAt(BaseIndex)) @@ -319,54 +349,15 @@ void VirtRegRewriter::rewrite() { SmallVector<unsigned, 8> SuperDeads; SmallVector<unsigned, 8> SuperDefs; SmallVector<unsigned, 8> SuperKills; - SmallPtrSet<const MachineInstr *, 4> NoReturnInsts; - - // Here we have a SparseSet to hold which PhysRegs are actually encountered - // in the MF we are about to iterate over so that later when we call - // setPhysRegUsed, we are only doing it for physRegs that were actually found - // in the program and not for all of the possible physRegs for the given - // target architecture. If the target has a lot of physRegs, then for a small - // program there will be a significant compile time reduction here. - PhysRegs.clear(); - PhysRegs.setUniverse(TRI->getNumRegs()); - - // The function with uwtable should guarantee that the stack unwinder - // can unwind the stack to the previous frame. Thus, we can't apply the - // noreturn optimization if the caller function has uwtable attribute. - bool HasUWTable = MF->getFunction()->hasFnAttribute(Attribute::UWTable); for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); MBBI != MBBE; ++MBBI) { DEBUG(MBBI->print(dbgs(), Indexes)); - bool IsExitBB = MBBI->succ_empty(); for (MachineBasicBlock::instr_iterator MII = MBBI->instr_begin(), MIE = MBBI->instr_end(); MII != MIE;) { - MachineInstr *MI = MII; + MachineInstr *MI = &*MII; ++MII; - // Check if this instruction is a call to a noreturn function. If this - // is a call to noreturn function and we don't need the stack unwinding - // functionality (i.e. this function does not have uwtable attribute and - // the callee function has the nounwind attribute), then we can ignore - // the definitions set by this instruction. - if (!HasUWTable && IsExitBB && MI->isCall()) { - for (MachineInstr::mop_iterator MOI = MI->operands_begin(), - MOE = MI->operands_end(); MOI != MOE; ++MOI) { - MachineOperand &MO = *MOI; - if (!MO.isGlobal()) - continue; - const Function *Func = dyn_cast<Function>(MO.getGlobal()); - if (!Func || !Func->hasFnAttribute(Attribute::NoReturn) || - // We need to keep correct unwind information - // even if the function will not return, since the - // runtime may need it. - !Func->hasFnAttribute(Attribute::NoUnwind)) - continue; - NoReturnInsts.insert(MI); - break; - } - } - for (MachineInstr::mop_iterator MOI = MI->operands_begin(), MOE = MI->operands_end(); MOI != MOE; ++MOI) { MachineOperand &MO = *MOI; @@ -375,15 +366,6 @@ void VirtRegRewriter::rewrite() { if (MO.isRegMask()) MRI->addPhysRegsUsedFromRegMask(MO.getRegMask()); - // If we encounter a VirtReg or PhysReg then get at the PhysReg and add - // it to the physreg bitset. Later we use only the PhysRegs that were - // actually encountered in the MF to populate the MRI's used physregs. - if (MO.isReg() && MO.getReg()) - PhysRegs.insert( - TargetRegisterInfo::isVirtualRegister(MO.getReg()) ? - VRM->getPhys(MO.getReg()) : - MO.getReg()); - if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; unsigned VirtReg = MO.getReg(); @@ -418,14 +400,6 @@ void VirtRegRewriter::rewrite() { MO.setIsUndef(true); } else if (!MO.isDead()) { assert(MO.isDef()); - // Things get tricky when we ran out of lane mask bits and - // merged multiple lanes into the overflow bit: In this case - // our subregister liveness tracking isn't precise and we can't - // know what subregister parts are undefined, fall back to the - // implicit super-register def then. - unsigned LaneMask = TRI->getSubRegIndexLaneMask(SubReg); - if (TargetRegisterInfo::isImpreciseLaneMask(LaneMask)) - SuperDefs.push_back(PhysReg); } } @@ -470,29 +444,5 @@ void VirtRegRewriter::rewrite() { } } } - - // Tell MRI about physical registers in use. - if (NoReturnInsts.empty()) { - for (SparseSet<unsigned>::iterator - RegI = PhysRegs.begin(), E = PhysRegs.end(); RegI != E; ++RegI) - if (!MRI->reg_nodbg_empty(*RegI)) - MRI->setPhysRegUsed(*RegI); - } else { - for (SparseSet<unsigned>::iterator - I = PhysRegs.begin(), E = PhysRegs.end(); I != E; ++I) { - unsigned Reg = *I; - if (MRI->reg_nodbg_empty(Reg)) - continue; - // Check if this register has a use that will impact the rest of the - // code. Uses in debug and noreturn instructions do not impact the - // generated code. - for (MachineInstr &It : MRI->reg_nodbg_instructions(Reg)) { - if (!NoReturnInsts.count(&It)) { - MRI->setPhysRegUsed(Reg); - break; - } - } - } - } } diff --git a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp index 0d26ed333ca7..52fb922c935a 100644 --- a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp @@ -18,66 +18,40 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/MapVector.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/Triple.h" -#include "llvm/ADT/TinyPtrVector.h" -#include "llvm/Analysis/LibCallSemantics.h" -#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/EHPersonalities.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/WinEHFuncInfo.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/PatternMatch.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/PromoteMemToReg.h" -#include <memory> +#include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; -using namespace llvm::PatternMatch; #define DEBUG_TYPE "winehprepare" -namespace { - -// This map is used to model frame variable usage during outlining, to -// construct a structure type to hold the frame variables in a frame -// allocation block, and to remap the frame variable allocas (including -// spill locations as needed) to GEPs that get the variable from the -// frame allocation structure. -typedef MapVector<Value *, TinyPtrVector<AllocaInst *>> FrameVarInfoMap; - -// TinyPtrVector cannot hold nullptr, so we need our own sentinel that isn't -// quite null. -AllocaInst *getCatchObjectSentinel() { - return static_cast<AllocaInst *>(nullptr) + 1; -} - -typedef SmallSet<BasicBlock *, 4> VisitedBlockSet; +static cl::opt<bool> DisableDemotion( + "disable-demotion", cl::Hidden, + cl::desc( + "Clone multicolor basic blocks but do not demote cross funclet values"), + cl::init(false)); -class LandingPadActions; -class LandingPadMap; - -typedef DenseMap<const BasicBlock *, CatchHandler *> CatchHandlerMapTy; -typedef DenseMap<const BasicBlock *, CleanupHandler *> CleanupHandlerMapTy; +static cl::opt<bool> DisableCleanups( + "disable-cleanups", cl::Hidden, + cl::desc("Do not remove implausible terminators or other similar cleanups"), + cl::init(false)); +namespace { + class WinEHPrepare : public FunctionPass { public: static char ID; // Pass identification, replacement for typeid. - WinEHPrepare(const TargetMachine *TM = nullptr) - : FunctionPass(ID) { - if (TM) - TheTriple = TM->getTargetTriple(); - } + WinEHPrepare(const TargetMachine *TM = nullptr) : FunctionPass(ID) {} bool runOnFunction(Function &Fn) override; @@ -90,264 +64,27 @@ public: } private: - bool prepareExceptionHandlers(Function &F, - SmallVectorImpl<LandingPadInst *> &LPads); - void identifyEHBlocks(Function &F, SmallVectorImpl<LandingPadInst *> &LPads); - void promoteLandingPadValues(LandingPadInst *LPad); - void demoteValuesLiveAcrossHandlers(Function &F, - SmallVectorImpl<LandingPadInst *> &LPads); - void findSEHEHReturnPoints(Function &F, - SetVector<BasicBlock *> &EHReturnBlocks); - void findCXXEHReturnPoints(Function &F, - SetVector<BasicBlock *> &EHReturnBlocks); - void getPossibleReturnTargets(Function *ParentF, Function *HandlerF, - SetVector<BasicBlock*> &Targets); - void completeNestedLandingPad(Function *ParentFn, - LandingPadInst *OutlinedLPad, - const LandingPadInst *OriginalLPad, - FrameVarInfoMap &VarInfo); - Function *createHandlerFunc(Function *ParentFn, Type *RetTy, - const Twine &Name, Module *M, Value *&ParentFP); - bool outlineHandler(ActionHandler *Action, Function *SrcFn, - LandingPadInst *LPad, BasicBlock *StartBB, - FrameVarInfoMap &VarInfo); - void addStubInvokeToHandlerIfNeeded(Function *Handler); - - void mapLandingPadBlocks(LandingPadInst *LPad, LandingPadActions &Actions); - CatchHandler *findCatchHandler(BasicBlock *BB, BasicBlock *&NextBB, - VisitedBlockSet &VisitedBlocks); - void findCleanupHandlers(LandingPadActions &Actions, BasicBlock *StartBB, - BasicBlock *EndBB); - - void processSEHCatchHandler(CatchHandler *Handler, BasicBlock *StartBB); - - Triple TheTriple; + void insertPHIStores(PHINode *OriginalPHI, AllocaInst *SpillSlot); + void + insertPHIStore(BasicBlock *PredBlock, Value *PredVal, AllocaInst *SpillSlot, + SmallVectorImpl<std::pair<BasicBlock *, Value *>> &Worklist); + AllocaInst *insertPHILoads(PHINode *PN, Function &F); + void replaceUseWithLoad(Value *V, Use &U, AllocaInst *&SpillSlot, + DenseMap<BasicBlock *, Value *> &Loads, Function &F); + bool prepareExplicitEH(Function &F); + void colorFunclets(Function &F); + + void demotePHIsOnFunclets(Function &F); + void cloneCommonBlocks(Function &F); + void removeImplausibleInstructions(Function &F); + void cleanupPreparedFunclets(Function &F); + void verifyPreparedFunclets(Function &F); // All fields are reset by runOnFunction. - DominatorTree *DT = nullptr; - const TargetLibraryInfo *LibInfo = nullptr; EHPersonality Personality = EHPersonality::Unknown; - CatchHandlerMapTy CatchHandlerMap; - CleanupHandlerMapTy CleanupHandlerMap; - DenseMap<const LandingPadInst *, LandingPadMap> LPadMaps; - SmallPtrSet<BasicBlock *, 4> NormalBlocks; - SmallPtrSet<BasicBlock *, 4> EHBlocks; - SetVector<BasicBlock *> EHReturnBlocks; - - // This maps landing pad instructions found in outlined handlers to - // the landing pad instruction in the parent function from which they - // were cloned. The cloned/nested landing pad is used as the key - // because the landing pad may be cloned into multiple handlers. - // This map will be used to add the llvm.eh.actions call to the nested - // landing pads after all handlers have been outlined. - DenseMap<LandingPadInst *, const LandingPadInst *> NestedLPtoOriginalLP; - - // This maps blocks in the parent function which are destinations of - // catch handlers to cloned blocks in (other) outlined handlers. This - // handles the case where a nested landing pads has a catch handler that - // returns to a handler function rather than the parent function. - // The original block is used as the key here because there should only - // ever be one handler function from which the cloned block is not pruned. - // The original block will be pruned from the parent function after all - // handlers have been outlined. This map will be used to adjust the - // return instructions of handlers which return to the block that was - // outlined into a handler. This is done after all handlers have been - // outlined but before the outlined code is pruned from the parent function. - DenseMap<const BasicBlock *, BasicBlock *> LPadTargetBlocks; - - // Map from outlined handler to call to parent local address. Only used for - // 32-bit EH. - DenseMap<Function *, Value *> HandlerToParentFP; - - AllocaInst *SEHExceptionCodeSlot = nullptr; -}; - -class WinEHFrameVariableMaterializer : public ValueMaterializer { -public: - WinEHFrameVariableMaterializer(Function *OutlinedFn, Value *ParentFP, - FrameVarInfoMap &FrameVarInfo); - ~WinEHFrameVariableMaterializer() override {} - - Value *materializeValueFor(Value *V) override; - - void escapeCatchObject(Value *V); - -private: - FrameVarInfoMap &FrameVarInfo; - IRBuilder<> Builder; -}; - -class LandingPadMap { -public: - LandingPadMap() : OriginLPad(nullptr) {} - void mapLandingPad(const LandingPadInst *LPad); - - bool isInitialized() { return OriginLPad != nullptr; } - - bool isOriginLandingPadBlock(const BasicBlock *BB) const; - bool isLandingPadSpecificInst(const Instruction *Inst) const; - - void remapEHValues(ValueToValueMapTy &VMap, Value *EHPtrValue, - Value *SelectorValue) const; - -private: - const LandingPadInst *OriginLPad; - // We will normally only see one of each of these instructions, but - // if more than one occurs for some reason we can handle that. - TinyPtrVector<const ExtractValueInst *> ExtractedEHPtrs; - TinyPtrVector<const ExtractValueInst *> ExtractedSelectors; -}; - -class WinEHCloningDirectorBase : public CloningDirector { -public: - WinEHCloningDirectorBase(Function *HandlerFn, Value *ParentFP, - FrameVarInfoMap &VarInfo, LandingPadMap &LPadMap) - : Materializer(HandlerFn, ParentFP, VarInfo), - SelectorIDType(Type::getInt32Ty(HandlerFn->getContext())), - Int8PtrType(Type::getInt8PtrTy(HandlerFn->getContext())), - LPadMap(LPadMap), ParentFP(ParentFP) {} - - CloningAction handleInstruction(ValueToValueMapTy &VMap, - const Instruction *Inst, - BasicBlock *NewBB) override; - - virtual CloningAction handleBeginCatch(ValueToValueMapTy &VMap, - const Instruction *Inst, - BasicBlock *NewBB) = 0; - virtual CloningAction handleEndCatch(ValueToValueMapTy &VMap, - const Instruction *Inst, - BasicBlock *NewBB) = 0; - virtual CloningAction handleTypeIdFor(ValueToValueMapTy &VMap, - const Instruction *Inst, - BasicBlock *NewBB) = 0; - virtual CloningAction handleIndirectBr(ValueToValueMapTy &VMap, - const IndirectBrInst *IBr, - BasicBlock *NewBB) = 0; - virtual CloningAction handleInvoke(ValueToValueMapTy &VMap, - const InvokeInst *Invoke, - BasicBlock *NewBB) = 0; - virtual CloningAction handleResume(ValueToValueMapTy &VMap, - const ResumeInst *Resume, - BasicBlock *NewBB) = 0; - virtual CloningAction handleCompare(ValueToValueMapTy &VMap, - const CmpInst *Compare, - BasicBlock *NewBB) = 0; - virtual CloningAction handleLandingPad(ValueToValueMapTy &VMap, - const LandingPadInst *LPad, - BasicBlock *NewBB) = 0; - - ValueMaterializer *getValueMaterializer() override { return &Materializer; } - -protected: - WinEHFrameVariableMaterializer Materializer; - Type *SelectorIDType; - Type *Int8PtrType; - LandingPadMap &LPadMap; - - /// The value representing the parent frame pointer. - Value *ParentFP; -}; - -class WinEHCatchDirector : public WinEHCloningDirectorBase { -public: - WinEHCatchDirector( - Function *CatchFn, Value *ParentFP, Value *Selector, - FrameVarInfoMap &VarInfo, LandingPadMap &LPadMap, - DenseMap<LandingPadInst *, const LandingPadInst *> &NestedLPads, - DominatorTree *DT, SmallPtrSetImpl<BasicBlock *> &EHBlocks) - : WinEHCloningDirectorBase(CatchFn, ParentFP, VarInfo, LPadMap), - CurrentSelector(Selector->stripPointerCasts()), - ExceptionObjectVar(nullptr), NestedLPtoOriginalLP(NestedLPads), - DT(DT), EHBlocks(EHBlocks) {} - - CloningAction handleBeginCatch(ValueToValueMapTy &VMap, - const Instruction *Inst, - BasicBlock *NewBB) override; - CloningAction handleEndCatch(ValueToValueMapTy &VMap, const Instruction *Inst, - BasicBlock *NewBB) override; - CloningAction handleTypeIdFor(ValueToValueMapTy &VMap, - const Instruction *Inst, - BasicBlock *NewBB) override; - CloningAction handleIndirectBr(ValueToValueMapTy &VMap, - const IndirectBrInst *IBr, - BasicBlock *NewBB) override; - CloningAction handleInvoke(ValueToValueMapTy &VMap, const InvokeInst *Invoke, - BasicBlock *NewBB) override; - CloningAction handleResume(ValueToValueMapTy &VMap, const ResumeInst *Resume, - BasicBlock *NewBB) override; - CloningAction handleCompare(ValueToValueMapTy &VMap, const CmpInst *Compare, - BasicBlock *NewBB) override; - CloningAction handleLandingPad(ValueToValueMapTy &VMap, - const LandingPadInst *LPad, - BasicBlock *NewBB) override; - - Value *getExceptionVar() { return ExceptionObjectVar; } - TinyPtrVector<BasicBlock *> &getReturnTargets() { return ReturnTargets; } - -private: - Value *CurrentSelector; - - Value *ExceptionObjectVar; - TinyPtrVector<BasicBlock *> ReturnTargets; - // This will be a reference to the field of the same name in the WinEHPrepare - // object which instantiates this WinEHCatchDirector object. - DenseMap<LandingPadInst *, const LandingPadInst *> &NestedLPtoOriginalLP; - DominatorTree *DT; - SmallPtrSetImpl<BasicBlock *> &EHBlocks; -}; - -class WinEHCleanupDirector : public WinEHCloningDirectorBase { -public: - WinEHCleanupDirector(Function *CleanupFn, Value *ParentFP, - FrameVarInfoMap &VarInfo, LandingPadMap &LPadMap) - : WinEHCloningDirectorBase(CleanupFn, ParentFP, VarInfo, - LPadMap) {} - - CloningAction handleBeginCatch(ValueToValueMapTy &VMap, - const Instruction *Inst, - BasicBlock *NewBB) override; - CloningAction handleEndCatch(ValueToValueMapTy &VMap, const Instruction *Inst, - BasicBlock *NewBB) override; - CloningAction handleTypeIdFor(ValueToValueMapTy &VMap, - const Instruction *Inst, - BasicBlock *NewBB) override; - CloningAction handleIndirectBr(ValueToValueMapTy &VMap, - const IndirectBrInst *IBr, - BasicBlock *NewBB) override; - CloningAction handleInvoke(ValueToValueMapTy &VMap, const InvokeInst *Invoke, - BasicBlock *NewBB) override; - CloningAction handleResume(ValueToValueMapTy &VMap, const ResumeInst *Resume, - BasicBlock *NewBB) override; - CloningAction handleCompare(ValueToValueMapTy &VMap, const CmpInst *Compare, - BasicBlock *NewBB) override; - CloningAction handleLandingPad(ValueToValueMapTy &VMap, - const LandingPadInst *LPad, - BasicBlock *NewBB) override; -}; - -class LandingPadActions { -public: - LandingPadActions() : HasCleanupHandlers(false) {} - - void insertCatchHandler(CatchHandler *Action) { Actions.push_back(Action); } - void insertCleanupHandler(CleanupHandler *Action) { - Actions.push_back(Action); - HasCleanupHandlers = true; - } - - bool includesCleanup() const { return HasCleanupHandlers; } - - SmallVectorImpl<ActionHandler *> &actions() { return Actions; } - SmallVectorImpl<ActionHandler *>::iterator begin() { return Actions.begin(); } - SmallVectorImpl<ActionHandler *>::iterator end() { return Actions.end(); } - -private: - // Note that this class does not own the ActionHandler objects in this vector. - // The ActionHandlers are owned by the CatchHandlerMap and CleanupHandlerMap - // in the WinEHPrepare class. - SmallVector<ActionHandler *, 4> Actions; - bool HasCleanupHandlers; + DenseMap<BasicBlock *, ColorVector> BlockColors; + MapVector<BasicBlock *, std::vector<BasicBlock *>> FuncletBlocks; }; } // end anonymous namespace @@ -361,2536 +98,987 @@ FunctionPass *llvm::createWinEHPass(const TargetMachine *TM) { } bool WinEHPrepare::runOnFunction(Function &Fn) { - // No need to prepare outlined handlers. - if (Fn.hasFnAttribute("wineh-parent")) - return false; - - SmallVector<LandingPadInst *, 4> LPads; - SmallVector<ResumeInst *, 4> Resumes; - for (BasicBlock &BB : Fn) { - if (auto *LP = BB.getLandingPadInst()) - LPads.push_back(LP); - if (auto *Resume = dyn_cast<ResumeInst>(BB.getTerminator())) - Resumes.push_back(Resume); - } - - // No need to prepare functions that lack landing pads. - if (LPads.empty()) + if (!Fn.hasPersonalityFn()) return false; // Classify the personality to see what kind of preparation we need. Personality = classifyEHPersonality(Fn.getPersonalityFn()); - // Do nothing if this is not an MSVC personality. - if (!isMSVCEHPersonality(Personality)) + // Do nothing if this is not a funclet-based personality. + if (!isFuncletEHPersonality(Personality)) return false; - DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); - - // If there were any landing pads, prepareExceptionHandlers will make changes. - prepareExceptionHandlers(Fn, LPads); - return true; + return prepareExplicitEH(Fn); } bool WinEHPrepare::doFinalization(Module &M) { return false; } -void WinEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<DominatorTreeWrapperPass>(); - AU.addRequired<TargetLibraryInfoWrapperPass>(); -} - -static bool isSelectorDispatch(BasicBlock *BB, BasicBlock *&CatchHandler, - Constant *&Selector, BasicBlock *&NextBB); - -// Finds blocks reachable from the starting set Worklist. Does not follow unwind -// edges or blocks listed in StopPoints. -static void findReachableBlocks(SmallPtrSetImpl<BasicBlock *> &ReachableBBs, - SetVector<BasicBlock *> &Worklist, - const SetVector<BasicBlock *> *StopPoints) { - while (!Worklist.empty()) { - BasicBlock *BB = Worklist.pop_back_val(); +void WinEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const {} - // Don't cross blocks that we should stop at. - if (StopPoints && StopPoints->count(BB)) - continue; - - if (!ReachableBBs.insert(BB).second) - continue; // Already visited. - - // Don't follow unwind edges of invokes. - if (auto *II = dyn_cast<InvokeInst>(BB->getTerminator())) { - Worklist.insert(II->getNormalDest()); - continue; - } - - // Otherwise, follow all successors. - Worklist.insert(succ_begin(BB), succ_end(BB)); - } -} - -// Attempt to find an instruction where a block can be split before -// a call to llvm.eh.begincatch and its operands. If the block -// begins with the begincatch call or one of its adjacent operands -// the block will not be split. -static Instruction *findBeginCatchSplitPoint(BasicBlock *BB, - IntrinsicInst *II) { - // If the begincatch call is already the first instruction in the block, - // don't split. - Instruction *FirstNonPHI = BB->getFirstNonPHI(); - if (II == FirstNonPHI) - return nullptr; - - // If either operand is in the same basic block as the instruction and - // isn't used by another instruction before the begincatch call, include it - // in the split block. - auto *Op0 = dyn_cast<Instruction>(II->getOperand(0)); - auto *Op1 = dyn_cast<Instruction>(II->getOperand(1)); - - Instruction *I = II->getPrevNode(); - Instruction *LastI = II; - - while (I == Op0 || I == Op1) { - // If the block begins with one of the operands and there are no other - // instructions between the operand and the begincatch call, don't split. - if (I == FirstNonPHI) - return nullptr; - - LastI = I; - I = I->getPrevNode(); - } - - // If there is at least one instruction in the block before the begincatch - // call and its operands, split the block at either the begincatch or - // its operand. - return LastI; +static int addUnwindMapEntry(WinEHFuncInfo &FuncInfo, int ToState, + const BasicBlock *BB) { + CxxUnwindMapEntry UME; + UME.ToState = ToState; + UME.Cleanup = BB; + FuncInfo.CxxUnwindMap.push_back(UME); + return FuncInfo.getLastStateNumber(); } -/// Find all points where exceptional control rejoins normal control flow via -/// llvm.eh.endcatch. Add them to the normal bb reachability worklist. -void WinEHPrepare::findCXXEHReturnPoints( - Function &F, SetVector<BasicBlock *> &EHReturnBlocks) { - for (auto BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) { - BasicBlock *BB = BBI; - for (Instruction &I : *BB) { - if (match(&I, m_Intrinsic<Intrinsic::eh_begincatch>())) { - Instruction *SplitPt = - findBeginCatchSplitPoint(BB, cast<IntrinsicInst>(&I)); - if (SplitPt) { - // Split the block before the llvm.eh.begincatch call to allow - // cleanup and catch code to be distinguished later. - // Do not update BBI because we still need to process the - // portion of the block that we are splitting off. - SplitBlock(BB, SplitPt, DT); - break; - } - } - if (match(&I, m_Intrinsic<Intrinsic::eh_endcatch>())) { - // Split the block after the call to llvm.eh.endcatch if there is - // anything other than an unconditional branch, or if the successor - // starts with a phi. - auto *Br = dyn_cast<BranchInst>(I.getNextNode()); - if (!Br || !Br->isUnconditional() || - isa<PHINode>(Br->getSuccessor(0)->begin())) { - DEBUG(dbgs() << "splitting block " << BB->getName() - << " with llvm.eh.endcatch\n"); - BBI = SplitBlock(BB, I.getNextNode(), DT); - } - // The next BB is normal control flow. - EHReturnBlocks.insert(BB->getTerminator()->getSuccessor(0)); - break; - } - } +static void addTryBlockMapEntry(WinEHFuncInfo &FuncInfo, int TryLow, + int TryHigh, int CatchHigh, + ArrayRef<const CatchPadInst *> Handlers) { + WinEHTryBlockMapEntry TBME; + TBME.TryLow = TryLow; + TBME.TryHigh = TryHigh; + TBME.CatchHigh = CatchHigh; + assert(TBME.TryLow <= TBME.TryHigh); + for (const CatchPadInst *CPI : Handlers) { + WinEHHandlerType HT; + Constant *TypeInfo = cast<Constant>(CPI->getArgOperand(0)); + if (TypeInfo->isNullValue()) + HT.TypeDescriptor = nullptr; + else + HT.TypeDescriptor = cast<GlobalVariable>(TypeInfo->stripPointerCasts()); + HT.Adjectives = cast<ConstantInt>(CPI->getArgOperand(1))->getZExtValue(); + HT.Handler = CPI->getParent(); + if (isa<ConstantPointerNull>(CPI->getArgOperand(2))) + HT.CatchObj.Alloca = nullptr; + else + HT.CatchObj.Alloca = cast<AllocaInst>(CPI->getArgOperand(2)); + TBME.HandlerArray.push_back(HT); } + FuncInfo.TryBlockMap.push_back(TBME); } -static bool isCatchAllLandingPad(const BasicBlock *BB) { - const LandingPadInst *LP = BB->getLandingPadInst(); - if (!LP) - return false; - unsigned N = LP->getNumClauses(); - return (N > 0 && LP->isCatch(N - 1) && - isa<ConstantPointerNull>(LP->getClause(N - 1))); +static BasicBlock *getCleanupRetUnwindDest(const CleanupPadInst *CleanupPad) { + for (const User *U : CleanupPad->users()) + if (const auto *CRI = dyn_cast<CleanupReturnInst>(U)) + return CRI->getUnwindDest(); + return nullptr; } -/// Find all points where exceptions control rejoins normal control flow via -/// selector dispatch. -void WinEHPrepare::findSEHEHReturnPoints( - Function &F, SetVector<BasicBlock *> &EHReturnBlocks) { - for (auto BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) { - BasicBlock *BB = BBI; - // If the landingpad is a catch-all, treat the whole lpad as if it is - // reachable from normal control flow. - // FIXME: This is imprecise. We need a better way of identifying where a - // catch-all starts and cleanups stop. As far as LLVM is concerned, there - // is no difference. - if (isCatchAllLandingPad(BB)) { - EHReturnBlocks.insert(BB); +static void calculateStateNumbersForInvokes(const Function *Fn, + WinEHFuncInfo &FuncInfo) { + auto *F = const_cast<Function *>(Fn); + DenseMap<BasicBlock *, ColorVector> BlockColors = colorEHFunclets(*F); + for (BasicBlock &BB : *F) { + auto *II = dyn_cast<InvokeInst>(BB.getTerminator()); + if (!II) continue; - } - - BasicBlock *CatchHandler; - BasicBlock *NextBB; - Constant *Selector; - if (isSelectorDispatch(BB, CatchHandler, Selector, NextBB)) { - // Split the edge if there are multiple predecessors. This creates a place - // where we can insert EH recovery code. - if (!CatchHandler->getSinglePredecessor()) { - DEBUG(dbgs() << "splitting EH return edge from " << BB->getName() - << " to " << CatchHandler->getName() << '\n'); - BBI = CatchHandler = SplitCriticalEdge( - BB, std::find(succ_begin(BB), succ_end(BB), CatchHandler)); - } - EHReturnBlocks.insert(CatchHandler); - } - } -} -void WinEHPrepare::identifyEHBlocks(Function &F, - SmallVectorImpl<LandingPadInst *> &LPads) { - DEBUG(dbgs() << "Demoting values live across exception handlers in function " - << F.getName() << '\n'); - - // Build a set of all non-exceptional blocks and exceptional blocks. - // - Non-exceptional blocks are blocks reachable from the entry block while - // not following invoke unwind edges. - // - Exceptional blocks are blocks reachable from landingpads. Analysis does - // not follow llvm.eh.endcatch blocks, which mark a transition from - // exceptional to normal control. - - if (Personality == EHPersonality::MSVC_CXX) - findCXXEHReturnPoints(F, EHReturnBlocks); - else - findSEHEHReturnPoints(F, EHReturnBlocks); - - DEBUG({ - dbgs() << "identified the following blocks as EH return points:\n"; - for (BasicBlock *BB : EHReturnBlocks) - dbgs() << " " << BB->getName() << '\n'; - }); - -// Join points should not have phis at this point, unless they are a -// landingpad, in which case we will demote their phis later. -#ifndef NDEBUG - for (BasicBlock *BB : EHReturnBlocks) - assert((BB->isLandingPad() || !isa<PHINode>(BB->begin())) && - "non-lpad EH return block has phi"); -#endif - - // Normal blocks are the blocks reachable from the entry block and all EH - // return points. - SetVector<BasicBlock *> Worklist; - Worklist = EHReturnBlocks; - Worklist.insert(&F.getEntryBlock()); - findReachableBlocks(NormalBlocks, Worklist, nullptr); - DEBUG({ - dbgs() << "marked the following blocks as normal:\n"; - for (BasicBlock *BB : NormalBlocks) - dbgs() << " " << BB->getName() << '\n'; - }); - - // Exceptional blocks are the blocks reachable from landingpads that don't - // cross EH return points. - Worklist.clear(); - for (auto *LPI : LPads) - Worklist.insert(LPI->getParent()); - findReachableBlocks(EHBlocks, Worklist, &EHReturnBlocks); - DEBUG({ - dbgs() << "marked the following blocks as exceptional:\n"; - for (BasicBlock *BB : EHBlocks) - dbgs() << " " << BB->getName() << '\n'; - }); - -} - -/// Ensure that all values live into and out of exception handlers are stored -/// in memory. -/// FIXME: This falls down when values are defined in one handler and live into -/// another handler. For example, a cleanup defines a value used only by a -/// catch handler. -void WinEHPrepare::demoteValuesLiveAcrossHandlers( - Function &F, SmallVectorImpl<LandingPadInst *> &LPads) { - DEBUG(dbgs() << "Demoting values live across exception handlers in function " - << F.getName() << '\n'); - - // identifyEHBlocks() should have been called before this function. - assert(!NormalBlocks.empty()); - - // Try to avoid demoting EH pointer and selector values. They get in the way - // of our pattern matching. - SmallPtrSet<Instruction *, 10> EHVals; - for (BasicBlock &BB : F) { - LandingPadInst *LP = BB.getLandingPadInst(); - if (!LP) - continue; - EHVals.insert(LP); - for (User *U : LP->users()) { - auto *EI = dyn_cast<ExtractValueInst>(U); - if (!EI) - continue; - EHVals.insert(EI); - for (User *U2 : EI->users()) { - if (auto *PN = dyn_cast<PHINode>(U2)) - EHVals.insert(PN); - } + auto &BBColors = BlockColors[&BB]; + assert(BBColors.size() == 1 && "multi-color BB not removed by preparation"); + BasicBlock *FuncletEntryBB = BBColors.front(); + + BasicBlock *FuncletUnwindDest; + auto *FuncletPad = + dyn_cast<FuncletPadInst>(FuncletEntryBB->getFirstNonPHI()); + assert(FuncletPad || FuncletEntryBB == &Fn->getEntryBlock()); + if (!FuncletPad) + FuncletUnwindDest = nullptr; + else if (auto *CatchPad = dyn_cast<CatchPadInst>(FuncletPad)) + FuncletUnwindDest = CatchPad->getCatchSwitch()->getUnwindDest(); + else if (auto *CleanupPad = dyn_cast<CleanupPadInst>(FuncletPad)) + FuncletUnwindDest = getCleanupRetUnwindDest(CleanupPad); + else + llvm_unreachable("unexpected funclet pad!"); + + BasicBlock *InvokeUnwindDest = II->getUnwindDest(); + int BaseState = -1; + if (FuncletUnwindDest == InvokeUnwindDest) { + auto BaseStateI = FuncInfo.FuncletBaseStateMap.find(FuncletPad); + if (BaseStateI != FuncInfo.FuncletBaseStateMap.end()) + BaseState = BaseStateI->second; } - } - SetVector<Argument *> ArgsToDemote; - SetVector<Instruction *> InstrsToDemote; - for (BasicBlock &BB : F) { - bool IsNormalBB = NormalBlocks.count(&BB); - bool IsEHBB = EHBlocks.count(&BB); - if (!IsNormalBB && !IsEHBB) - continue; // Blocks that are neither normal nor EH are unreachable. - for (Instruction &I : BB) { - for (Value *Op : I.operands()) { - // Don't demote static allocas, constants, and labels. - if (isa<Constant>(Op) || isa<BasicBlock>(Op) || isa<InlineAsm>(Op)) - continue; - auto *AI = dyn_cast<AllocaInst>(Op); - if (AI && AI->isStaticAlloca()) - continue; - - if (auto *Arg = dyn_cast<Argument>(Op)) { - if (IsEHBB) { - DEBUG(dbgs() << "Demoting argument " << *Arg - << " used by EH instr: " << I << "\n"); - ArgsToDemote.insert(Arg); - } - continue; - } - - // Don't demote EH values. - auto *OpI = cast<Instruction>(Op); - if (EHVals.count(OpI)) - continue; - - BasicBlock *OpBB = OpI->getParent(); - // If a value is produced and consumed in the same BB, we don't need to - // demote it. - if (OpBB == &BB) - continue; - bool IsOpNormalBB = NormalBlocks.count(OpBB); - bool IsOpEHBB = EHBlocks.count(OpBB); - if (IsNormalBB != IsOpNormalBB || IsEHBB != IsOpEHBB) { - DEBUG({ - dbgs() << "Demoting instruction live in-out from EH:\n"; - dbgs() << "Instr: " << *OpI << '\n'; - dbgs() << "User: " << I << '\n'; - }); - InstrsToDemote.insert(OpI); - } - } - } - } - - // Demote values live into and out of handlers. - // FIXME: This demotion is inefficient. We should insert spills at the point - // of definition, insert one reload in each handler that uses the value, and - // insert reloads in the BB used to rejoin normal control flow. - Instruction *AllocaInsertPt = F.getEntryBlock().getFirstInsertionPt(); - for (Instruction *I : InstrsToDemote) - DemoteRegToStack(*I, false, AllocaInsertPt); - - // Demote arguments separately, and only for uses in EH blocks. - for (Argument *Arg : ArgsToDemote) { - auto *Slot = new AllocaInst(Arg->getType(), nullptr, - Arg->getName() + ".reg2mem", AllocaInsertPt); - SmallVector<User *, 4> Users(Arg->user_begin(), Arg->user_end()); - for (User *U : Users) { - auto *I = dyn_cast<Instruction>(U); - if (I && EHBlocks.count(I->getParent())) { - auto *Reload = new LoadInst(Slot, Arg->getName() + ".reload", false, I); - U->replaceUsesOfWith(Arg, Reload); - } + if (BaseState != -1) { + FuncInfo.InvokeStateMap[II] = BaseState; + } else { + Instruction *PadInst = InvokeUnwindDest->getFirstNonPHI(); + assert(FuncInfo.EHPadStateMap.count(PadInst) && "EH Pad has no state!"); + FuncInfo.InvokeStateMap[II] = FuncInfo.EHPadStateMap[PadInst]; } - new StoreInst(Arg, Slot, AllocaInsertPt); - } - - // Demote landingpad phis, as the landingpad will be removed from the machine - // CFG. - for (LandingPadInst *LPI : LPads) { - BasicBlock *BB = LPI->getParent(); - while (auto *Phi = dyn_cast<PHINode>(BB->begin())) - DemotePHIToStack(Phi, AllocaInsertPt); } - - DEBUG(dbgs() << "Demoted " << InstrsToDemote.size() << " instructions and " - << ArgsToDemote.size() << " arguments for WinEHPrepare\n\n"); } -bool WinEHPrepare::prepareExceptionHandlers( - Function &F, SmallVectorImpl<LandingPadInst *> &LPads) { - // Don't run on functions that are already prepared. - for (LandingPadInst *LPad : LPads) { - BasicBlock *LPadBB = LPad->getParent(); - for (Instruction &Inst : *LPadBB) - if (match(&Inst, m_Intrinsic<Intrinsic::eh_actions>())) - return false; - } - - identifyEHBlocks(F, LPads); - demoteValuesLiveAcrossHandlers(F, LPads); - - // These containers are used to re-map frame variables that are used in - // outlined catch and cleanup handlers. They will be populated as the - // handlers are outlined. - FrameVarInfoMap FrameVarInfo; - - bool HandlersOutlined = false; - - Module *M = F.getParent(); - LLVMContext &Context = M->getContext(); - - // Create a new function to receive the handler contents. - PointerType *Int8PtrType = Type::getInt8PtrTy(Context); - Type *Int32Type = Type::getInt32Ty(Context); - Function *ActionIntrin = Intrinsic::getDeclaration(M, Intrinsic::eh_actions); - - if (isAsynchronousEHPersonality(Personality)) { - // FIXME: Switch the ehptr type to i32 and then switch this. - SEHExceptionCodeSlot = - new AllocaInst(Int8PtrType, nullptr, "seh_exception_code", - F.getEntryBlock().getFirstInsertionPt()); +// Given BB which ends in an unwind edge, return the EHPad that this BB belongs +// to. If the unwind edge came from an invoke, return null. +static const BasicBlock *getEHPadFromPredecessor(const BasicBlock *BB, + Value *ParentPad) { + const TerminatorInst *TI = BB->getTerminator(); + if (isa<InvokeInst>(TI)) + return nullptr; + if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(TI)) { + if (CatchSwitch->getParentPad() != ParentPad) + return nullptr; + return BB; } + assert(!TI->isEHPad() && "unexpected EHPad!"); + auto *CleanupPad = cast<CleanupReturnInst>(TI)->getCleanupPad(); + if (CleanupPad->getParentPad() != ParentPad) + return nullptr; + return CleanupPad->getParent(); +} - // In order to handle the case where one outlined catch handler returns - // to a block within another outlined catch handler that would otherwise - // be unreachable, we need to outline the nested landing pad before we - // outline the landing pad which encloses it. - if (!isAsynchronousEHPersonality(Personality)) - std::sort(LPads.begin(), LPads.end(), - [this](LandingPadInst *const &L, LandingPadInst *const &R) { - return DT->properlyDominates(R->getParent(), L->getParent()); - }); - - // This container stores the llvm.eh.recover and IndirectBr instructions - // that make up the body of each landing pad after it has been outlined. - // We need to defer the population of the target list for the indirectbr - // until all landing pads have been outlined so that we can handle the - // case of blocks in the target that are reached only from nested - // landing pads. - SmallVector<std::pair<CallInst*, IndirectBrInst *>, 4> LPadImpls; - - for (LandingPadInst *LPad : LPads) { - // Look for evidence that this landingpad has already been processed. - bool LPadHasActionList = false; - BasicBlock *LPadBB = LPad->getParent(); - for (Instruction &Inst : *LPadBB) { - if (match(&Inst, m_Intrinsic<Intrinsic::eh_actions>())) { - LPadHasActionList = true; - break; - } - } - - // If we've already outlined the handlers for this landingpad, - // there's nothing more to do here. - if (LPadHasActionList) - continue; - - // If either of the values in the aggregate returned by the landing pad is - // extracted and stored to memory, promote the stored value to a register. - promoteLandingPadValues(LPad); - - LandingPadActions Actions; - mapLandingPadBlocks(LPad, Actions); - - HandlersOutlined |= !Actions.actions().empty(); - for (ActionHandler *Action : Actions) { - if (Action->hasBeenProcessed()) - continue; - BasicBlock *StartBB = Action->getStartBlock(); - - // SEH doesn't do any outlining for catches. Instead, pass the handler - // basic block addr to llvm.eh.actions and list the block as a return - // target. - if (isAsynchronousEHPersonality(Personality)) { - if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) { - processSEHCatchHandler(CatchAction, StartBB); - continue; - } - } - - outlineHandler(Action, &F, LPad, StartBB, FrameVarInfo); - } - - // Split the block after the landingpad instruction so that it is just a - // call to llvm.eh.actions followed by indirectbr. - assert(!isa<PHINode>(LPadBB->begin()) && "lpad phi not removed"); - SplitBlock(LPadBB, LPad->getNextNode(), DT); - // Erase the branch inserted by the split so we can insert indirectbr. - LPadBB->getTerminator()->eraseFromParent(); - - // Replace all extracted values with undef and ultimately replace the - // landingpad with undef. - SmallVector<Instruction *, 4> SEHCodeUses; - SmallVector<Instruction *, 4> EHUndefs; - for (User *U : LPad->users()) { - auto *E = dyn_cast<ExtractValueInst>(U); - if (!E) - continue; - assert(E->getNumIndices() == 1 && - "Unexpected operation: extracting both landing pad values"); - unsigned Idx = *E->idx_begin(); - assert((Idx == 0 || Idx == 1) && "unexpected index"); - if (Idx == 0 && isAsynchronousEHPersonality(Personality)) - SEHCodeUses.push_back(E); - else - EHUndefs.push_back(E); - } - for (Instruction *E : EHUndefs) { - E->replaceAllUsesWith(UndefValue::get(E->getType())); - E->eraseFromParent(); - } - LPad->replaceAllUsesWith(UndefValue::get(LPad->getType())); - - // Rewrite uses of the exception pointer to loads of an alloca. - while (!SEHCodeUses.empty()) { - Instruction *E = SEHCodeUses.pop_back_val(); - SmallVector<Use *, 4> Uses; - for (Use &U : E->uses()) - Uses.push_back(&U); - for (Use *U : Uses) { - auto *I = cast<Instruction>(U->getUser()); - if (isa<ResumeInst>(I)) - continue; - if (auto *Phi = dyn_cast<PHINode>(I)) - SEHCodeUses.push_back(Phi); - else - U->set(new LoadInst(SEHExceptionCodeSlot, "sehcode", false, I)); - } - E->replaceAllUsesWith(UndefValue::get(E->getType())); - E->eraseFromParent(); - } - - // Add a call to describe the actions for this landing pad. - std::vector<Value *> ActionArgs; - for (ActionHandler *Action : Actions) { - // Action codes from docs are: 0 cleanup, 1 catch. - if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) { - ActionArgs.push_back(ConstantInt::get(Int32Type, 1)); - ActionArgs.push_back(CatchAction->getSelector()); - // Find the frame escape index of the exception object alloca in the - // parent. - int FrameEscapeIdx = -1; - Value *EHObj = const_cast<Value *>(CatchAction->getExceptionVar()); - if (EHObj && !isa<ConstantPointerNull>(EHObj)) { - auto I = FrameVarInfo.find(EHObj); - assert(I != FrameVarInfo.end() && - "failed to map llvm.eh.begincatch var"); - FrameEscapeIdx = std::distance(FrameVarInfo.begin(), I); - } - ActionArgs.push_back(ConstantInt::get(Int32Type, FrameEscapeIdx)); - } else { - ActionArgs.push_back(ConstantInt::get(Int32Type, 0)); - } - ActionArgs.push_back(Action->getHandlerBlockOrFunc()); - } - CallInst *Recover = - CallInst::Create(ActionIntrin, ActionArgs, "recover", LPadBB); - - SetVector<BasicBlock *> ReturnTargets; - for (ActionHandler *Action : Actions) { - if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) { - const auto &CatchTargets = CatchAction->getReturnTargets(); - ReturnTargets.insert(CatchTargets.begin(), CatchTargets.end()); - } - } - IndirectBrInst *Branch = - IndirectBrInst::Create(Recover, ReturnTargets.size(), LPadBB); - for (BasicBlock *Target : ReturnTargets) - Branch->addDestination(Target); - - if (!isAsynchronousEHPersonality(Personality)) { - // C++ EH must repopulate the targets later to handle the case of - // targets that are reached indirectly through nested landing pads. - LPadImpls.push_back(std::make_pair(Recover, Branch)); - } - - } // End for each landingpad +static void calculateCXXStateNumbers(WinEHFuncInfo &FuncInfo, + const Instruction *FirstNonPHI, + int ParentState) { + const BasicBlock *BB = FirstNonPHI->getParent(); + assert(BB->isEHPad() && "not a funclet!"); - // If nothing got outlined, there is no more processing to be done. - if (!HandlersOutlined) - return false; + if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(FirstNonPHI)) { + assert(FuncInfo.EHPadStateMap.count(CatchSwitch) == 0 && + "shouldn't revist catch funclets!"); - // Replace any nested landing pad stubs with the correct action handler. - // This must be done before we remove unreachable blocks because it - // cleans up references to outlined blocks that will be deleted. - for (auto &LPadPair : NestedLPtoOriginalLP) - completeNestedLandingPad(&F, LPadPair.first, LPadPair.second, FrameVarInfo); - NestedLPtoOriginalLP.clear(); - - // Update the indirectbr instructions' target lists if necessary. - SetVector<BasicBlock*> CheckedTargets; - SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList; - for (auto &LPadImplPair : LPadImpls) { - IntrinsicInst *Recover = cast<IntrinsicInst>(LPadImplPair.first); - IndirectBrInst *Branch = LPadImplPair.second; - - // Get a list of handlers called by - parseEHActions(Recover, ActionList); - - // Add an indirect branch listing possible successors of the catch handlers. - SetVector<BasicBlock *> ReturnTargets; - for (const auto &Action : ActionList) { - if (auto *CA = dyn_cast<CatchHandler>(Action.get())) { - Function *Handler = cast<Function>(CA->getHandlerBlockOrFunc()); - getPossibleReturnTargets(&F, Handler, ReturnTargets); - } - } - ActionList.clear(); - // Clear any targets we already knew about. - for (unsigned int I = 0, E = Branch->getNumDestinations(); I < E; ++I) { - BasicBlock *KnownTarget = Branch->getDestination(I); - if (ReturnTargets.count(KnownTarget)) - ReturnTargets.remove(KnownTarget); + SmallVector<const CatchPadInst *, 2> Handlers; + for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) { + auto *CatchPad = cast<CatchPadInst>(CatchPadBB->getFirstNonPHI()); + Handlers.push_back(CatchPad); } - for (BasicBlock *Target : ReturnTargets) { - Branch->addDestination(Target); - // The target may be a block that we excepted to get pruned. - // If it is, it may contain a call to llvm.eh.endcatch. - if (CheckedTargets.insert(Target)) { - // Earlier preparations guarantee that all calls to llvm.eh.endcatch - // will be followed by an unconditional branch. - auto *Br = dyn_cast<BranchInst>(Target->getTerminator()); - if (Br && Br->isUnconditional() && - Br != Target->getFirstNonPHIOrDbgOrLifetime()) { - Instruction *Prev = Br->getPrevNode(); - if (match(cast<Value>(Prev), m_Intrinsic<Intrinsic::eh_endcatch>())) - Prev->eraseFromParent(); - } + int TryLow = addUnwindMapEntry(FuncInfo, ParentState, nullptr); + FuncInfo.EHPadStateMap[CatchSwitch] = TryLow; + for (const BasicBlock *PredBlock : predecessors(BB)) + if ((PredBlock = getEHPadFromPredecessor(PredBlock, + CatchSwitch->getParentPad()))) + calculateCXXStateNumbers(FuncInfo, PredBlock->getFirstNonPHI(), + TryLow); + int CatchLow = addUnwindMapEntry(FuncInfo, ParentState, nullptr); + + // catchpads are separate funclets in C++ EH due to the way rethrow works. + int TryHigh = CatchLow - 1; + for (const auto *CatchPad : Handlers) { + FuncInfo.FuncletBaseStateMap[CatchPad] = CatchLow; + for (const User *U : CatchPad->users()) { + const auto *UserI = cast<Instruction>(U); + if (auto *InnerCatchSwitch = dyn_cast<CatchSwitchInst>(UserI)) + if (InnerCatchSwitch->getUnwindDest() == CatchSwitch->getUnwindDest()) + calculateCXXStateNumbers(FuncInfo, UserI, CatchLow); + if (auto *InnerCleanupPad = dyn_cast<CleanupPadInst>(UserI)) + if (getCleanupRetUnwindDest(InnerCleanupPad) == + CatchSwitch->getUnwindDest()) + calculateCXXStateNumbers(FuncInfo, UserI, CatchLow); } } - } - LPadImpls.clear(); - - F.addFnAttr("wineh-parent", F.getName()); - - // Delete any blocks that were only used by handlers that were outlined above. - removeUnreachableBlocks(F); + int CatchHigh = FuncInfo.getLastStateNumber(); + addTryBlockMapEntry(FuncInfo, TryLow, TryHigh, CatchHigh, Handlers); + DEBUG(dbgs() << "TryLow[" << BB->getName() << "]: " << TryLow << '\n'); + DEBUG(dbgs() << "TryHigh[" << BB->getName() << "]: " << TryHigh << '\n'); + DEBUG(dbgs() << "CatchHigh[" << BB->getName() << "]: " << CatchHigh + << '\n'); + } else { + auto *CleanupPad = cast<CleanupPadInst>(FirstNonPHI); - BasicBlock *Entry = &F.getEntryBlock(); - IRBuilder<> Builder(F.getParent()->getContext()); - Builder.SetInsertPoint(Entry->getFirstInsertionPt()); - - Function *FrameEscapeFn = - Intrinsic::getDeclaration(M, Intrinsic::localescape); - Function *RecoverFrameFn = - Intrinsic::getDeclaration(M, Intrinsic::localrecover); - SmallVector<Value *, 8> AllocasToEscape; - - // Scan the entry block for an existing call to llvm.localescape. We need to - // keep escaping those objects. - for (Instruction &I : F.front()) { - auto *II = dyn_cast<IntrinsicInst>(&I); - if (II && II->getIntrinsicID() == Intrinsic::localescape) { - auto Args = II->arg_operands(); - AllocasToEscape.append(Args.begin(), Args.end()); - II->eraseFromParent(); - break; - } - } + // It's possible for a cleanup to be visited twice: it might have multiple + // cleanupret instructions. + if (FuncInfo.EHPadStateMap.count(CleanupPad)) + return; - // Finally, replace all of the temporary allocas for frame variables used in - // the outlined handlers with calls to llvm.localrecover. - for (auto &VarInfoEntry : FrameVarInfo) { - Value *ParentVal = VarInfoEntry.first; - TinyPtrVector<AllocaInst *> &Allocas = VarInfoEntry.second; - AllocaInst *ParentAlloca = cast<AllocaInst>(ParentVal); - - // FIXME: We should try to sink unescaped allocas from the parent frame into - // the child frame. If the alloca is escaped, we have to use the lifetime - // markers to ensure that the alloca is only live within the child frame. - - // Add this alloca to the list of things to escape. - AllocasToEscape.push_back(ParentAlloca); - - // Next replace all outlined allocas that are mapped to it. - for (AllocaInst *TempAlloca : Allocas) { - if (TempAlloca == getCatchObjectSentinel()) - continue; // Skip catch parameter sentinels. - Function *HandlerFn = TempAlloca->getParent()->getParent(); - llvm::Value *FP = HandlerToParentFP[HandlerFn]; - assert(FP); - - // FIXME: Sink this localrecover into the blocks where it is used. - Builder.SetInsertPoint(TempAlloca); - Builder.SetCurrentDebugLocation(TempAlloca->getDebugLoc()); - Value *RecoverArgs[] = { - Builder.CreateBitCast(&F, Int8PtrType, ""), FP, - llvm::ConstantInt::get(Int32Type, AllocasToEscape.size() - 1)}; - Instruction *RecoveredAlloca = - Builder.CreateCall(RecoverFrameFn, RecoverArgs); - - // Add a pointer bitcast if the alloca wasn't an i8. - if (RecoveredAlloca->getType() != TempAlloca->getType()) { - RecoveredAlloca->setName(Twine(TempAlloca->getName()) + ".i8"); - RecoveredAlloca = cast<Instruction>( - Builder.CreateBitCast(RecoveredAlloca, TempAlloca->getType())); + int CleanupState = addUnwindMapEntry(FuncInfo, ParentState, BB); + FuncInfo.EHPadStateMap[CleanupPad] = CleanupState; + DEBUG(dbgs() << "Assigning state #" << CleanupState << " to BB " + << BB->getName() << '\n'); + for (const BasicBlock *PredBlock : predecessors(BB)) { + if ((PredBlock = getEHPadFromPredecessor(PredBlock, + CleanupPad->getParentPad()))) { + calculateCXXStateNumbers(FuncInfo, PredBlock->getFirstNonPHI(), + CleanupState); } - TempAlloca->replaceAllUsesWith(RecoveredAlloca); - TempAlloca->removeFromParent(); - RecoveredAlloca->takeName(TempAlloca); - delete TempAlloca; } - } // End for each FrameVarInfo entry. - - // Insert 'call void (...)* @llvm.localescape(...)' at the end of the entry - // block. - Builder.SetInsertPoint(&F.getEntryBlock().back()); - Builder.CreateCall(FrameEscapeFn, AllocasToEscape); - - if (SEHExceptionCodeSlot) { - if (isAllocaPromotable(SEHExceptionCodeSlot)) { - SmallPtrSet<BasicBlock *, 4> UserBlocks; - for (User *U : SEHExceptionCodeSlot->users()) { - if (auto *Inst = dyn_cast<Instruction>(U)) - UserBlocks.insert(Inst->getParent()); - } - PromoteMemToReg(SEHExceptionCodeSlot, *DT); - // After the promotion, kill off dead instructions. - for (BasicBlock *BB : UserBlocks) - SimplifyInstructionsInBlock(BB, LibInfo); + for (const User *U : CleanupPad->users()) { + const auto *UserI = cast<Instruction>(U); + if (UserI->isEHPad()) + report_fatal_error("Cleanup funclets for the MSVC++ personality cannot " + "contain exceptional actions"); } } +} - // Clean up the handler action maps we created for this function - DeleteContainerSeconds(CatchHandlerMap); - CatchHandlerMap.clear(); - DeleteContainerSeconds(CleanupHandlerMap); - CleanupHandlerMap.clear(); - HandlerToParentFP.clear(); - DT = nullptr; - LibInfo = nullptr; - SEHExceptionCodeSlot = nullptr; - EHBlocks.clear(); - NormalBlocks.clear(); - EHReturnBlocks.clear(); - - return HandlersOutlined; +static int addSEHExcept(WinEHFuncInfo &FuncInfo, int ParentState, + const Function *Filter, const BasicBlock *Handler) { + SEHUnwindMapEntry Entry; + Entry.ToState = ParentState; + Entry.IsFinally = false; + Entry.Filter = Filter; + Entry.Handler = Handler; + FuncInfo.SEHUnwindMap.push_back(Entry); + return FuncInfo.SEHUnwindMap.size() - 1; } -void WinEHPrepare::promoteLandingPadValues(LandingPadInst *LPad) { - // If the return values of the landing pad instruction are extracted and - // stored to memory, we want to promote the store locations to reg values. - SmallVector<AllocaInst *, 2> EHAllocas; - - // The landingpad instruction returns an aggregate value. Typically, its - // value will be passed to a pair of extract value instructions and the - // results of those extracts are often passed to store instructions. - // In unoptimized code the stored value will often be loaded and then stored - // again. - for (auto *U : LPad->users()) { - ExtractValueInst *Extract = dyn_cast<ExtractValueInst>(U); - if (!Extract) - continue; +static int addSEHFinally(WinEHFuncInfo &FuncInfo, int ParentState, + const BasicBlock *Handler) { + SEHUnwindMapEntry Entry; + Entry.ToState = ParentState; + Entry.IsFinally = true; + Entry.Filter = nullptr; + Entry.Handler = Handler; + FuncInfo.SEHUnwindMap.push_back(Entry); + return FuncInfo.SEHUnwindMap.size() - 1; +} - for (auto *EU : Extract->users()) { - if (auto *Store = dyn_cast<StoreInst>(EU)) { - auto *AV = cast<AllocaInst>(Store->getPointerOperand()); - EHAllocas.push_back(AV); - } +static void calculateSEHStateNumbers(WinEHFuncInfo &FuncInfo, + const Instruction *FirstNonPHI, + int ParentState) { + const BasicBlock *BB = FirstNonPHI->getParent(); + assert(BB->isEHPad() && "no a funclet!"); + + if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(FirstNonPHI)) { + assert(FuncInfo.EHPadStateMap.count(CatchSwitch) == 0 && + "shouldn't revist catch funclets!"); + + // Extract the filter function and the __except basic block and create a + // state for them. + assert(CatchSwitch->getNumHandlers() == 1 && + "SEH doesn't have multiple handlers per __try"); + const auto *CatchPad = + cast<CatchPadInst>((*CatchSwitch->handler_begin())->getFirstNonPHI()); + const BasicBlock *CatchPadBB = CatchPad->getParent(); + const Constant *FilterOrNull = + cast<Constant>(CatchPad->getArgOperand(0)->stripPointerCasts()); + const Function *Filter = dyn_cast<Function>(FilterOrNull); + assert((Filter || FilterOrNull->isNullValue()) && + "unexpected filter value"); + int TryState = addSEHExcept(FuncInfo, ParentState, Filter, CatchPadBB); + + // Everything in the __try block uses TryState as its parent state. + FuncInfo.EHPadStateMap[CatchSwitch] = TryState; + DEBUG(dbgs() << "Assigning state #" << TryState << " to BB " + << CatchPadBB->getName() << '\n'); + for (const BasicBlock *PredBlock : predecessors(BB)) + if ((PredBlock = getEHPadFromPredecessor(PredBlock, + CatchSwitch->getParentPad()))) + calculateSEHStateNumbers(FuncInfo, PredBlock->getFirstNonPHI(), + TryState); + + // Everything in the __except block unwinds to ParentState, just like code + // outside the __try. + for (const User *U : CatchPad->users()) { + const auto *UserI = cast<Instruction>(U); + if (auto *InnerCatchSwitch = dyn_cast<CatchSwitchInst>(UserI)) + if (InnerCatchSwitch->getUnwindDest() == CatchSwitch->getUnwindDest()) + calculateSEHStateNumbers(FuncInfo, UserI, ParentState); + if (auto *InnerCleanupPad = dyn_cast<CleanupPadInst>(UserI)) + if (getCleanupRetUnwindDest(InnerCleanupPad) == + CatchSwitch->getUnwindDest()) + calculateSEHStateNumbers(FuncInfo, UserI, ParentState); } - } + } else { + auto *CleanupPad = cast<CleanupPadInst>(FirstNonPHI); - // We can't do this without a dominator tree. - assert(DT); + // It's possible for a cleanup to be visited twice: it might have multiple + // cleanupret instructions. + if (FuncInfo.EHPadStateMap.count(CleanupPad)) + return; - if (!EHAllocas.empty()) { - PromoteMemToReg(EHAllocas, *DT); - EHAllocas.clear(); + int CleanupState = addSEHFinally(FuncInfo, ParentState, BB); + FuncInfo.EHPadStateMap[CleanupPad] = CleanupState; + DEBUG(dbgs() << "Assigning state #" << CleanupState << " to BB " + << BB->getName() << '\n'); + for (const BasicBlock *PredBlock : predecessors(BB)) + if ((PredBlock = + getEHPadFromPredecessor(PredBlock, CleanupPad->getParentPad()))) + calculateSEHStateNumbers(FuncInfo, PredBlock->getFirstNonPHI(), + CleanupState); + for (const User *U : CleanupPad->users()) { + const auto *UserI = cast<Instruction>(U); + if (UserI->isEHPad()) + report_fatal_error("Cleanup funclets for the SEH personality cannot " + "contain exceptional actions"); + } } +} - // After promotion, some extracts may be trivially dead. Remove them. - SmallVector<Value *, 4> Users(LPad->user_begin(), LPad->user_end()); - for (auto *U : Users) - RecursivelyDeleteTriviallyDeadInstructions(U); +static bool isTopLevelPadForMSVC(const Instruction *EHPad) { + if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(EHPad)) + return isa<ConstantTokenNone>(CatchSwitch->getParentPad()) && + CatchSwitch->unwindsToCaller(); + if (auto *CleanupPad = dyn_cast<CleanupPadInst>(EHPad)) + return isa<ConstantTokenNone>(CleanupPad->getParentPad()) && + getCleanupRetUnwindDest(CleanupPad) == nullptr; + if (isa<CatchPadInst>(EHPad)) + return false; + llvm_unreachable("unexpected EHPad!"); } -void WinEHPrepare::getPossibleReturnTargets(Function *ParentF, - Function *HandlerF, - SetVector<BasicBlock*> &Targets) { - for (BasicBlock &BB : *HandlerF) { - // If the handler contains landing pads, check for any - // handlers that may return directly to a block in the - // parent function. - if (auto *LPI = BB.getLandingPadInst()) { - IntrinsicInst *Recover = cast<IntrinsicInst>(LPI->getNextNode()); - SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList; - parseEHActions(Recover, ActionList); - for (const auto &Action : ActionList) { - if (auto *CH = dyn_cast<CatchHandler>(Action.get())) { - Function *NestedF = cast<Function>(CH->getHandlerBlockOrFunc()); - getPossibleReturnTargets(ParentF, NestedF, Targets); - } - } - } +void llvm::calculateSEHStateNumbers(const Function *Fn, + WinEHFuncInfo &FuncInfo) { + // Don't compute state numbers twice. + if (!FuncInfo.SEHUnwindMap.empty()) + return; - auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator()); - if (!Ret) + for (const BasicBlock &BB : *Fn) { + if (!BB.isEHPad()) continue; - - // Handler functions must always return a block address. - BlockAddress *BA = cast<BlockAddress>(Ret->getReturnValue()); - - // If this is the handler for a nested landing pad, the - // return address may have been remapped to a block in the - // parent handler. We're not interested in those. - if (BA->getFunction() != ParentF) + const Instruction *FirstNonPHI = BB.getFirstNonPHI(); + if (!isTopLevelPadForMSVC(FirstNonPHI)) continue; - - Targets.insert(BA->getBasicBlock()); + ::calculateSEHStateNumbers(FuncInfo, FirstNonPHI, -1); } + + calculateStateNumbersForInvokes(Fn, FuncInfo); } -void WinEHPrepare::completeNestedLandingPad(Function *ParentFn, - LandingPadInst *OutlinedLPad, - const LandingPadInst *OriginalLPad, - FrameVarInfoMap &FrameVarInfo) { - // Get the nested block and erase the unreachable instruction that was - // temporarily inserted as its terminator. - LLVMContext &Context = ParentFn->getContext(); - BasicBlock *OutlinedBB = OutlinedLPad->getParent(); - // If the nested landing pad was outlined before the landing pad that enclosed - // it, it will already be in outlined form. In that case, we just need to see - // if the returns and the enclosing branch instruction need to be updated. - IndirectBrInst *Branch = - dyn_cast<IndirectBrInst>(OutlinedBB->getTerminator()); - if (!Branch) { - // If the landing pad wasn't in outlined form, it should be a stub with - // an unreachable terminator. - assert(isa<UnreachableInst>(OutlinedBB->getTerminator())); - OutlinedBB->getTerminator()->eraseFromParent(); - // That should leave OutlinedLPad as the last instruction in its block. - assert(&OutlinedBB->back() == OutlinedLPad); - } +void llvm::calculateWinCXXEHStateNumbers(const Function *Fn, + WinEHFuncInfo &FuncInfo) { + // Return if it's already been done. + if (!FuncInfo.EHPadStateMap.empty()) + return; - // The original landing pad will have already had its action intrinsic - // built by the outlining loop. We need to clone that into the outlined - // location. It may also be necessary to add references to the exception - // variables to the outlined handler in which this landing pad is nested - // and remap return instructions in the nested handlers that should return - // to an address in the outlined handler. - Function *OutlinedHandlerFn = OutlinedBB->getParent(); - BasicBlock::const_iterator II = OriginalLPad; - ++II; - // The instruction after the landing pad should now be a call to eh.actions. - const Instruction *Recover = II; - const IntrinsicInst *EHActions = cast<IntrinsicInst>(Recover); - - // Remap the return target in the nested handler. - SmallVector<BlockAddress *, 4> ActionTargets; - SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList; - parseEHActions(EHActions, ActionList); - for (const auto &Action : ActionList) { - auto *Catch = dyn_cast<CatchHandler>(Action.get()); - if (!Catch) + for (const BasicBlock &BB : *Fn) { + if (!BB.isEHPad()) continue; - // The dyn_cast to function here selects C++ catch handlers and skips - // SEH catch handlers. - auto *Handler = dyn_cast<Function>(Catch->getHandlerBlockOrFunc()); - if (!Handler) + const Instruction *FirstNonPHI = BB.getFirstNonPHI(); + if (!isTopLevelPadForMSVC(FirstNonPHI)) continue; - // Visit all the return instructions, looking for places that return - // to a location within OutlinedHandlerFn. - for (BasicBlock &NestedHandlerBB : *Handler) { - auto *Ret = dyn_cast<ReturnInst>(NestedHandlerBB.getTerminator()); - if (!Ret) - continue; - - // Handler functions must always return a block address. - BlockAddress *BA = cast<BlockAddress>(Ret->getReturnValue()); - // The original target will have been in the main parent function, - // but if it is the address of a block that has been outlined, it - // should be a block that was outlined into OutlinedHandlerFn. - assert(BA->getFunction() == ParentFn); - - // Ignore targets that aren't part of an outlined handler function. - if (!LPadTargetBlocks.count(BA->getBasicBlock())) - continue; - - // If the return value is the address ofF a block that we - // previously outlined into the parent handler function, replace - // the return instruction and add the mapped target to the list - // of possible return addresses. - BasicBlock *MappedBB = LPadTargetBlocks[BA->getBasicBlock()]; - assert(MappedBB->getParent() == OutlinedHandlerFn); - BlockAddress *NewBA = BlockAddress::get(OutlinedHandlerFn, MappedBB); - Ret->eraseFromParent(); - ReturnInst::Create(Context, NewBA, &NestedHandlerBB); - ActionTargets.push_back(NewBA); - } - } - ActionList.clear(); - - if (Branch) { - // If the landing pad was already in outlined form, just update its targets. - for (unsigned int I = Branch->getNumDestinations(); I > 0; --I) - Branch->removeDestination(I); - // Add the previously collected action targets. - for (auto *Target : ActionTargets) - Branch->addDestination(Target->getBasicBlock()); - } else { - // If the landing pad was previously stubbed out, fill in its outlined form. - IntrinsicInst *NewEHActions = cast<IntrinsicInst>(EHActions->clone()); - OutlinedBB->getInstList().push_back(NewEHActions); - - // Insert an indirect branch into the outlined landing pad BB. - IndirectBrInst *IBr = IndirectBrInst::Create(NewEHActions, 0, OutlinedBB); - // Add the previously collected action targets. - for (auto *Target : ActionTargets) - IBr->addDestination(Target->getBasicBlock()); - } -} - -// This function examines a block to determine whether the block ends with a -// conditional branch to a catch handler based on a selector comparison. -// This function is used both by the WinEHPrepare::findSelectorComparison() and -// WinEHCleanupDirector::handleTypeIdFor(). -static bool isSelectorDispatch(BasicBlock *BB, BasicBlock *&CatchHandler, - Constant *&Selector, BasicBlock *&NextBB) { - ICmpInst::Predicate Pred; - BasicBlock *TBB, *FBB; - Value *LHS, *RHS; - - if (!match(BB->getTerminator(), - m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), TBB, FBB))) - return false; - - if (!match(LHS, - m_Intrinsic<Intrinsic::eh_typeid_for>(m_Constant(Selector))) && - !match(RHS, m_Intrinsic<Intrinsic::eh_typeid_for>(m_Constant(Selector)))) - return false; - - if (Pred == CmpInst::ICMP_EQ) { - CatchHandler = TBB; - NextBB = FBB; - return true; - } - - if (Pred == CmpInst::ICMP_NE) { - CatchHandler = FBB; - NextBB = TBB; - return true; + calculateCXXStateNumbers(FuncInfo, FirstNonPHI, -1); } - return false; + calculateStateNumbersForInvokes(Fn, FuncInfo); } -static bool isCatchBlock(BasicBlock *BB) { - for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end(); - II != IE; ++II) { - if (match(cast<Value>(II), m_Intrinsic<Intrinsic::eh_begincatch>())) - return true; - } - return false; -} - -static BasicBlock *createStubLandingPad(Function *Handler) { - // FIXME: Finish this! - LLVMContext &Context = Handler->getContext(); - BasicBlock *StubBB = BasicBlock::Create(Context, "stub"); - Handler->getBasicBlockList().push_back(StubBB); - IRBuilder<> Builder(StubBB); - LandingPadInst *LPad = Builder.CreateLandingPad( - llvm::StructType::get(Type::getInt8PtrTy(Context), - Type::getInt32Ty(Context), nullptr), - 0); - // Insert a call to llvm.eh.actions so that we don't try to outline this lpad. - Function *ActionIntrin = - Intrinsic::getDeclaration(Handler->getParent(), Intrinsic::eh_actions); - Builder.CreateCall(ActionIntrin, {}, "recover"); - LPad->setCleanup(true); - Builder.CreateUnreachable(); - return StubBB; -} - -// Cycles through the blocks in an outlined handler function looking for an -// invoke instruction and inserts an invoke of llvm.donothing with an empty -// landing pad if none is found. The code that generates the .xdata tables for -// the handler needs at least one landing pad to identify the parent function's -// personality. -void WinEHPrepare::addStubInvokeToHandlerIfNeeded(Function *Handler) { - ReturnInst *Ret = nullptr; - UnreachableInst *Unreached = nullptr; - for (BasicBlock &BB : *Handler) { - TerminatorInst *Terminator = BB.getTerminator(); - // If we find an invoke, there is nothing to be done. - auto *II = dyn_cast<InvokeInst>(Terminator); - if (II) - return; - // If we've already recorded a return instruction, keep looking for invokes. - if (!Ret) - Ret = dyn_cast<ReturnInst>(Terminator); - // If we haven't recorded an unreachable instruction, try this terminator. - if (!Unreached) - Unreached = dyn_cast<UnreachableInst>(Terminator); - } - - // If we got this far, the handler contains no invokes. We should have seen - // at least one return or unreachable instruction. We'll insert an invoke of - // llvm.donothing ahead of that instruction. - assert(Ret || Unreached); - TerminatorInst *Term; - if (Ret) - Term = Ret; - else - Term = Unreached; - BasicBlock *OldRetBB = Term->getParent(); - BasicBlock *NewRetBB = SplitBlock(OldRetBB, Term, DT); - // SplitBlock adds an unconditional branch instruction at the end of the - // parent block. We want to replace that with an invoke call, so we can - // erase it now. - OldRetBB->getTerminator()->eraseFromParent(); - BasicBlock *StubLandingPad = createStubLandingPad(Handler); - Function *F = - Intrinsic::getDeclaration(Handler->getParent(), Intrinsic::donothing); - InvokeInst::Create(F, NewRetBB, StubLandingPad, None, "", OldRetBB); +static int addClrEHHandler(WinEHFuncInfo &FuncInfo, int ParentState, + ClrHandlerType HandlerType, uint32_t TypeToken, + const BasicBlock *Handler) { + ClrEHUnwindMapEntry Entry; + Entry.Parent = ParentState; + Entry.Handler = Handler; + Entry.HandlerType = HandlerType; + Entry.TypeToken = TypeToken; + FuncInfo.ClrEHUnwindMap.push_back(Entry); + return FuncInfo.ClrEHUnwindMap.size() - 1; } -// FIXME: Consider sinking this into lib/Target/X86 somehow. TargetLowering -// usually doesn't build LLVM IR, so that's probably the wrong place. -Function *WinEHPrepare::createHandlerFunc(Function *ParentFn, Type *RetTy, - const Twine &Name, Module *M, - Value *&ParentFP) { - // x64 uses a two-argument prototype where the parent FP is the second - // argument. x86 uses no arguments, just the incoming EBP value. - LLVMContext &Context = M->getContext(); - Type *Int8PtrType = Type::getInt8PtrTy(Context); - FunctionType *FnType; - if (TheTriple.getArch() == Triple::x86_64) { - Type *ArgTys[2] = {Int8PtrType, Int8PtrType}; - FnType = FunctionType::get(RetTy, ArgTys, false); - } else { - FnType = FunctionType::get(RetTy, None, false); - } - - Function *Handler = - Function::Create(FnType, GlobalVariable::InternalLinkage, Name, M); - BasicBlock *Entry = BasicBlock::Create(Context, "entry"); - Handler->getBasicBlockList().push_front(Entry); - if (TheTriple.getArch() == Triple::x86_64) { - ParentFP = &(Handler->getArgumentList().back()); - } else { - assert(M); - Function *FrameAddressFn = - Intrinsic::getDeclaration(M, Intrinsic::frameaddress); - Function *RecoverFPFn = - Intrinsic::getDeclaration(M, Intrinsic::x86_seh_recoverfp); - IRBuilder<> Builder(&Handler->getEntryBlock()); - Value *EBP = - Builder.CreateCall(FrameAddressFn, {Builder.getInt32(1)}, "ebp"); - Value *ParentI8Fn = Builder.CreateBitCast(ParentFn, Int8PtrType); - ParentFP = Builder.CreateCall(RecoverFPFn, {ParentI8Fn, EBP}); - } - return Handler; -} +void llvm::calculateClrEHStateNumbers(const Function *Fn, + WinEHFuncInfo &FuncInfo) { + // Return if it's already been done. + if (!FuncInfo.EHPadStateMap.empty()) + return; -bool WinEHPrepare::outlineHandler(ActionHandler *Action, Function *SrcFn, - LandingPadInst *LPad, BasicBlock *StartBB, - FrameVarInfoMap &VarInfo) { - Module *M = SrcFn->getParent(); - LLVMContext &Context = M->getContext(); - Type *Int8PtrType = Type::getInt8PtrTy(Context); - - // Create a new function to receive the handler contents. - Value *ParentFP; - Function *Handler; - if (Action->getType() == Catch) { - Handler = createHandlerFunc(SrcFn, Int8PtrType, SrcFn->getName() + ".catch", M, - ParentFP); - } else { - Handler = createHandlerFunc(SrcFn, Type::getVoidTy(Context), - SrcFn->getName() + ".cleanup", M, ParentFP); - } - Handler->setPersonalityFn(SrcFn->getPersonalityFn()); - HandlerToParentFP[Handler] = ParentFP; - Handler->addFnAttr("wineh-parent", SrcFn->getName()); - BasicBlock *Entry = &Handler->getEntryBlock(); - - // Generate a standard prolog to setup the frame recovery structure. - IRBuilder<> Builder(Context); - Builder.SetInsertPoint(Entry); - Builder.SetCurrentDebugLocation(LPad->getDebugLoc()); - - std::unique_ptr<WinEHCloningDirectorBase> Director; - - ValueToValueMapTy VMap; - - LandingPadMap &LPadMap = LPadMaps[LPad]; - if (!LPadMap.isInitialized()) - LPadMap.mapLandingPad(LPad); - if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) { - Constant *Sel = CatchAction->getSelector(); - Director.reset(new WinEHCatchDirector(Handler, ParentFP, Sel, VarInfo, - LPadMap, NestedLPtoOriginalLP, DT, - EHBlocks)); - LPadMap.remapEHValues(VMap, UndefValue::get(Int8PtrType), - ConstantInt::get(Type::getInt32Ty(Context), 1)); - } else { - Director.reset( - new WinEHCleanupDirector(Handler, ParentFP, VarInfo, LPadMap)); - LPadMap.remapEHValues(VMap, UndefValue::get(Int8PtrType), - UndefValue::get(Type::getInt32Ty(Context))); - } + SmallVector<std::pair<const Instruction *, int>, 8> Worklist; - SmallVector<ReturnInst *, 8> Returns; - ClonedCodeInfo OutlinedFunctionInfo; - - // If the start block contains PHI nodes, we need to map them. - BasicBlock::iterator II = StartBB->begin(); - while (auto *PN = dyn_cast<PHINode>(II)) { - bool Mapped = false; - // Look for PHI values that we have already mapped (such as the selector). - for (Value *Val : PN->incoming_values()) { - if (VMap.count(Val)) { - VMap[PN] = VMap[Val]; - Mapped = true; - } - } - // If we didn't find a match for this value, map it as an undef. - if (!Mapped) { - VMap[PN] = UndefValue::get(PN->getType()); - } - ++II; + // Each pad needs to be able to refer to its parent, so scan the function + // looking for top-level handlers and seed the worklist with them. + for (const BasicBlock &BB : *Fn) { + if (!BB.isEHPad()) + continue; + if (BB.isLandingPad()) + report_fatal_error("CoreCLR EH cannot use landingpads"); + const Instruction *FirstNonPHI = BB.getFirstNonPHI(); + if (!isTopLevelPadForMSVC(FirstNonPHI)) + continue; + // queue this with sentinel parent state -1 to mean unwind to caller. + Worklist.emplace_back(FirstNonPHI, -1); } - // The landing pad value may be used by PHI nodes. It will ultimately be - // eliminated, but we need it in the map for intermediate handling. - VMap[LPad] = UndefValue::get(LPad->getType()); - - // Skip over PHIs and, if applicable, landingpad instructions. - II = StartBB->getFirstInsertionPt(); - - CloneAndPruneIntoFromInst(Handler, SrcFn, II, VMap, - /*ModuleLevelChanges=*/false, Returns, "", - &OutlinedFunctionInfo, Director.get()); - - // Move all the instructions in the cloned "entry" block into our entry block. - // Depending on how the parent function was laid out, the block that will - // correspond to the outlined entry block may not be the first block in the - // list. We can recognize it, however, as the cloned block which has no - // predecessors. Any other block wouldn't have been cloned if it didn't - // have a predecessor which was also cloned. - Function::iterator ClonedIt = std::next(Function::iterator(Entry)); - while (!pred_empty(ClonedIt)) - ++ClonedIt; - BasicBlock *ClonedEntryBB = ClonedIt; - assert(ClonedEntryBB); - Entry->getInstList().splice(Entry->end(), ClonedEntryBB->getInstList()); - ClonedEntryBB->eraseFromParent(); - - // Make sure we can identify the handler's personality later. - addStubInvokeToHandlerIfNeeded(Handler); - - if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) { - WinEHCatchDirector *CatchDirector = - reinterpret_cast<WinEHCatchDirector *>(Director.get()); - CatchAction->setExceptionVar(CatchDirector->getExceptionVar()); - CatchAction->setReturnTargets(CatchDirector->getReturnTargets()); - - // Look for blocks that are not part of the landing pad that we just - // outlined but terminate with a call to llvm.eh.endcatch and a - // branch to a block that is in the handler we just outlined. - // These blocks will be part of a nested landing pad that intends to - // return to an address in this handler. This case is best handled - // after both landing pads have been outlined, so for now we'll just - // save the association of the blocks in LPadTargetBlocks. The - // return instructions which are created from these branches will be - // replaced after all landing pads have been outlined. - for (const auto MapEntry : VMap) { - // VMap maps all values and blocks that were just cloned, but dead - // blocks which were pruned will map to nullptr. - if (!isa<BasicBlock>(MapEntry.first) || MapEntry.second == nullptr) + while (!Worklist.empty()) { + const Instruction *Pad; + int ParentState; + std::tie(Pad, ParentState) = Worklist.pop_back_val(); + + Value *ParentPad; + int PredState; + if (const CleanupPadInst *Cleanup = dyn_cast<CleanupPadInst>(Pad)) { + // A cleanup can have multiple exits; don't re-process after the first. + if (FuncInfo.EHPadStateMap.count(Cleanup)) continue; - const BasicBlock *MappedBB = cast<BasicBlock>(MapEntry.first); - for (auto *Pred : predecessors(const_cast<BasicBlock *>(MappedBB))) { - auto *Branch = dyn_cast<BranchInst>(Pred->getTerminator()); - if (!Branch || !Branch->isUnconditional() || Pred->size() <= 1) - continue; - BasicBlock::iterator II = const_cast<BranchInst *>(Branch); - --II; - if (match(cast<Value>(II), m_Intrinsic<Intrinsic::eh_endcatch>())) { - // This would indicate that a nested landing pad wants to return - // to a block that is outlined into two different handlers. - assert(!LPadTargetBlocks.count(MappedBB)); - LPadTargetBlocks[MappedBB] = cast<BasicBlock>(MapEntry.second); + // CoreCLR personality uses arity to distinguish faults from finallies. + const BasicBlock *PadBlock = Cleanup->getParent(); + ClrHandlerType HandlerType = + (Cleanup->getNumOperands() ? ClrHandlerType::Fault + : ClrHandlerType::Finally); + int NewState = + addClrEHHandler(FuncInfo, ParentState, HandlerType, 0, PadBlock); + FuncInfo.EHPadStateMap[Cleanup] = NewState; + // Propagate the new state to all preds of the cleanup + ParentPad = Cleanup->getParentPad(); + PredState = NewState; + } else if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) { + SmallVector<const CatchPadInst *, 1> Handlers; + for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) { + const auto *Catch = cast<CatchPadInst>(CatchPadBB->getFirstNonPHI()); + Handlers.push_back(Catch); + } + FuncInfo.EHPadStateMap[CatchSwitch] = ParentState; + int NewState = ParentState; + for (auto HandlerI = Handlers.rbegin(), HandlerE = Handlers.rend(); + HandlerI != HandlerE; ++HandlerI) { + const CatchPadInst *Catch = *HandlerI; + const BasicBlock *PadBlock = Catch->getParent(); + uint32_t TypeToken = static_cast<uint32_t>( + cast<ConstantInt>(Catch->getArgOperand(0))->getZExtValue()); + NewState = addClrEHHandler(FuncInfo, NewState, ClrHandlerType::Catch, + TypeToken, PadBlock); + FuncInfo.EHPadStateMap[Catch] = NewState; + } + for (const auto *CatchPad : Handlers) { + for (const User *U : CatchPad->users()) { + const auto *UserI = cast<Instruction>(U); + if (UserI->isEHPad()) + Worklist.emplace_back(UserI, ParentState); } } + PredState = NewState; + ParentPad = CatchSwitch->getParentPad(); + } else { + llvm_unreachable("Unexpected EH pad"); } - } // End if (CatchAction) - - Action->setHandlerBlockOrFunc(Handler); - - return true; -} - -/// This BB must end in a selector dispatch. All we need to do is pass the -/// handler block to llvm.eh.actions and list it as a possible indirectbr -/// target. -void WinEHPrepare::processSEHCatchHandler(CatchHandler *CatchAction, - BasicBlock *StartBB) { - BasicBlock *HandlerBB; - BasicBlock *NextBB; - Constant *Selector; - bool Res = isSelectorDispatch(StartBB, HandlerBB, Selector, NextBB); - if (Res) { - // If this was EH dispatch, this must be a conditional branch to the handler - // block. - // FIXME: Handle instructions in the dispatch block. Currently we drop them, - // leading to crashes if some optimization hoists stuff here. - assert(CatchAction->getSelector() && HandlerBB && - "expected catch EH dispatch"); - } else { - // This must be a catch-all. Split the block after the landingpad. - assert(CatchAction->getSelector()->isNullValue() && "expected catch-all"); - HandlerBB = SplitBlock(StartBB, StartBB->getFirstInsertionPt(), DT); - } - IRBuilder<> Builder(HandlerBB->getFirstInsertionPt()); - Function *EHCodeFn = Intrinsic::getDeclaration( - StartBB->getParent()->getParent(), Intrinsic::eh_exceptioncode); - Value *Code = Builder.CreateCall(EHCodeFn, {}, "sehcode"); - Code = Builder.CreateIntToPtr(Code, SEHExceptionCodeSlot->getAllocatedType()); - Builder.CreateStore(Code, SEHExceptionCodeSlot); - CatchAction->setHandlerBlockOrFunc(BlockAddress::get(HandlerBB)); - TinyPtrVector<BasicBlock *> Targets(HandlerBB); - CatchAction->setReturnTargets(Targets); -} -void LandingPadMap::mapLandingPad(const LandingPadInst *LPad) { - // Each instance of this class should only ever be used to map a single - // landing pad. - assert(OriginLPad == nullptr || OriginLPad == LPad); - - // If the landing pad has already been mapped, there's nothing more to do. - if (OriginLPad == LPad) - return; - - OriginLPad = LPad; - - // The landingpad instruction returns an aggregate value. Typically, its - // value will be passed to a pair of extract value instructions and the - // results of those extracts will have been promoted to reg values before - // this routine is called. - for (auto *U : LPad->users()) { - const ExtractValueInst *Extract = dyn_cast<ExtractValueInst>(U); - if (!Extract) - continue; - assert(Extract->getNumIndices() == 1 && - "Unexpected operation: extracting both landing pad values"); - unsigned int Idx = *(Extract->idx_begin()); - assert((Idx == 0 || Idx == 1) && - "Unexpected operation: extracting an unknown landing pad element"); - if (Idx == 0) { - ExtractedEHPtrs.push_back(Extract); - } else if (Idx == 1) { - ExtractedSelectors.push_back(Extract); + // Queue all predecessors with the given state + for (const BasicBlock *Pred : predecessors(Pad->getParent())) { + if ((Pred = getEHPadFromPredecessor(Pred, ParentPad))) + Worklist.emplace_back(Pred->getFirstNonPHI(), PredState); } } -} -bool LandingPadMap::isOriginLandingPadBlock(const BasicBlock *BB) const { - return BB->getLandingPadInst() == OriginLPad; + calculateStateNumbersForInvokes(Fn, FuncInfo); } -bool LandingPadMap::isLandingPadSpecificInst(const Instruction *Inst) const { - if (Inst == OriginLPad) - return true; - for (auto *Extract : ExtractedEHPtrs) { - if (Inst == Extract) - return true; - } - for (auto *Extract : ExtractedSelectors) { - if (Inst == Extract) - return true; - } - return false; -} - -void LandingPadMap::remapEHValues(ValueToValueMapTy &VMap, Value *EHPtrValue, - Value *SelectorValue) const { - // Remap all landing pad extract instructions to the specified values. - for (auto *Extract : ExtractedEHPtrs) - VMap[Extract] = EHPtrValue; - for (auto *Extract : ExtractedSelectors) - VMap[Extract] = SelectorValue; -} - -static bool isLocalAddressCall(const Value *V) { - return match(const_cast<Value *>(V), m_Intrinsic<Intrinsic::localaddress>()); -} - -CloningDirector::CloningAction WinEHCloningDirectorBase::handleInstruction( - ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) { - // If this is one of the boilerplate landing pad instructions, skip it. - // The instruction will have already been remapped in VMap. - if (LPadMap.isLandingPadSpecificInst(Inst)) - return CloningDirector::SkipInstruction; - - // Nested landing pads that have not already been outlined will be cloned as - // stubs, with just the landingpad instruction and an unreachable instruction. - // When all landingpads have been outlined, we'll replace this with the - // llvm.eh.actions call and indirect branch created when the landing pad was - // outlined. - if (auto *LPad = dyn_cast<LandingPadInst>(Inst)) { - return handleLandingPad(VMap, LPad, NewBB); - } +void WinEHPrepare::colorFunclets(Function &F) { + BlockColors = colorEHFunclets(F); - // Nested landing pads that have already been outlined will be cloned in their - // outlined form, but we need to intercept the ibr instruction to filter out - // targets that do not return to the handler we are outlining. - if (auto *IBr = dyn_cast<IndirectBrInst>(Inst)) { - return handleIndirectBr(VMap, IBr, NewBB); - } - - if (auto *Invoke = dyn_cast<InvokeInst>(Inst)) - return handleInvoke(VMap, Invoke, NewBB); - - if (auto *Resume = dyn_cast<ResumeInst>(Inst)) - return handleResume(VMap, Resume, NewBB); - - if (auto *Cmp = dyn_cast<CmpInst>(Inst)) - return handleCompare(VMap, Cmp, NewBB); - - if (match(Inst, m_Intrinsic<Intrinsic::eh_begincatch>())) - return handleBeginCatch(VMap, Inst, NewBB); - if (match(Inst, m_Intrinsic<Intrinsic::eh_endcatch>())) - return handleEndCatch(VMap, Inst, NewBB); - if (match(Inst, m_Intrinsic<Intrinsic::eh_typeid_for>())) - return handleTypeIdFor(VMap, Inst, NewBB); - - // When outlining llvm.localaddress(), remap that to the second argument, - // which is the FP of the parent. - if (isLocalAddressCall(Inst)) { - VMap[Inst] = ParentFP; - return CloningDirector::SkipInstruction; - } - - // Continue with the default cloning behavior. - return CloningDirector::CloneInstruction; -} - -CloningDirector::CloningAction WinEHCatchDirector::handleLandingPad( - ValueToValueMapTy &VMap, const LandingPadInst *LPad, BasicBlock *NewBB) { - // If the instruction after the landing pad is a call to llvm.eh.actions - // the landing pad has already been outlined. In this case, we should - // clone it because it may return to a block in the handler we are - // outlining now that would otherwise be unreachable. The landing pads - // are sorted before outlining begins to enable this case to work - // properly. - const Instruction *NextI = LPad->getNextNode(); - if (match(NextI, m_Intrinsic<Intrinsic::eh_actions>())) - return CloningDirector::CloneInstruction; - - // If the landing pad hasn't been outlined yet, the landing pad we are - // outlining now does not dominate it and so it cannot return to a block - // in this handler. In that case, we can just insert a stub landing - // pad now and patch it up later. - Instruction *NewInst = LPad->clone(); - if (LPad->hasName()) - NewInst->setName(LPad->getName()); - // Save this correlation for later processing. - NestedLPtoOriginalLP[cast<LandingPadInst>(NewInst)] = LPad; - VMap[LPad] = NewInst; - BasicBlock::InstListType &InstList = NewBB->getInstList(); - InstList.push_back(NewInst); - InstList.push_back(new UnreachableInst(NewBB->getContext())); - return CloningDirector::StopCloningBB; -} - -CloningDirector::CloningAction WinEHCatchDirector::handleBeginCatch( - ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) { - // The argument to the call is some form of the first element of the - // landingpad aggregate value, but that doesn't matter. It isn't used - // here. - // The second argument is an outparameter where the exception object will be - // stored. Typically the exception object is a scalar, but it can be an - // aggregate when catching by value. - // FIXME: Leave something behind to indicate where the exception object lives - // for this handler. Should it be part of llvm.eh.actions? - assert(ExceptionObjectVar == nullptr && "Multiple calls to " - "llvm.eh.begincatch found while " - "outlining catch handler."); - ExceptionObjectVar = Inst->getOperand(1)->stripPointerCasts(); - if (isa<ConstantPointerNull>(ExceptionObjectVar)) - return CloningDirector::SkipInstruction; - assert(cast<AllocaInst>(ExceptionObjectVar)->isStaticAlloca() && - "catch parameter is not static alloca"); - Materializer.escapeCatchObject(ExceptionObjectVar); - return CloningDirector::SkipInstruction; -} - -CloningDirector::CloningAction -WinEHCatchDirector::handleEndCatch(ValueToValueMapTy &VMap, - const Instruction *Inst, BasicBlock *NewBB) { - auto *IntrinCall = dyn_cast<IntrinsicInst>(Inst); - // It might be interesting to track whether or not we are inside a catch - // function, but that might make the algorithm more brittle than it needs - // to be. - - // The end catch call can occur in one of two places: either in a - // landingpad block that is part of the catch handlers exception mechanism, - // or at the end of the catch block. However, a catch-all handler may call - // end catch from the original landing pad. If the call occurs in a nested - // landing pad block, we must skip it and continue so that the landing pad - // gets cloned. - auto *ParentBB = IntrinCall->getParent(); - if (ParentBB->isLandingPad() && !LPadMap.isOriginLandingPadBlock(ParentBB)) - return CloningDirector::SkipInstruction; - - // If an end catch occurs anywhere else we want to terminate the handler - // with a return to the code that follows the endcatch call. If the - // next instruction is not an unconditional branch, we need to split the - // block to provide a clear target for the return instruction. - BasicBlock *ContinueBB; - auto Next = std::next(BasicBlock::const_iterator(IntrinCall)); - const BranchInst *Branch = dyn_cast<BranchInst>(Next); - if (!Branch || !Branch->isUnconditional()) { - // We're interrupting the cloning process at this location, so the - // const_cast we're doing here will not cause a problem. - ContinueBB = SplitBlock(const_cast<BasicBlock *>(ParentBB), - const_cast<Instruction *>(cast<Instruction>(Next))); - } else { - ContinueBB = Branch->getSuccessor(0); + // Invert the map from BB to colors to color to BBs. + for (BasicBlock &BB : F) { + ColorVector &Colors = BlockColors[&BB]; + for (BasicBlock *Color : Colors) + FuncletBlocks[Color].push_back(&BB); } - - ReturnInst::Create(NewBB->getContext(), BlockAddress::get(ContinueBB), NewBB); - ReturnTargets.push_back(ContinueBB); - - // We just added a terminator to the cloned block. - // Tell the caller to stop processing the current basic block so that - // the branch instruction will be skipped. - return CloningDirector::StopCloningBB; -} - -CloningDirector::CloningAction WinEHCatchDirector::handleTypeIdFor( - ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) { - auto *IntrinCall = dyn_cast<IntrinsicInst>(Inst); - Value *Selector = IntrinCall->getArgOperand(0)->stripPointerCasts(); - // This causes a replacement that will collapse the landing pad CFG based - // on the filter function we intend to match. - if (Selector == CurrentSelector) - VMap[Inst] = ConstantInt::get(SelectorIDType, 1); - else - VMap[Inst] = ConstantInt::get(SelectorIDType, 0); - // Tell the caller not to clone this instruction. - return CloningDirector::SkipInstruction; } -CloningDirector::CloningAction WinEHCatchDirector::handleIndirectBr( - ValueToValueMapTy &VMap, - const IndirectBrInst *IBr, - BasicBlock *NewBB) { - // If this indirect branch is not part of a landing pad block, just clone it. - const BasicBlock *ParentBB = IBr->getParent(); - if (!ParentBB->isLandingPad()) - return CloningDirector::CloneInstruction; - - // If it is part of a landing pad, we want to filter out target blocks - // that are not part of the handler we are outlining. - const LandingPadInst *LPad = ParentBB->getLandingPadInst(); - - // Save this correlation for later processing. - NestedLPtoOriginalLP[cast<LandingPadInst>(VMap[LPad])] = LPad; - - // We should only get here for landing pads that have already been outlined. - assert(match(LPad->getNextNode(), m_Intrinsic<Intrinsic::eh_actions>())); - - // Copy the indirectbr, but only include targets that were previously - // identified as EH blocks and are dominated by the nested landing pad. - SetVector<const BasicBlock *> ReturnTargets; - for (int I = 0, E = IBr->getNumDestinations(); I < E; ++I) { - auto *TargetBB = IBr->getDestination(I); - if (EHBlocks.count(const_cast<BasicBlock*>(TargetBB)) && - DT->dominates(ParentBB, TargetBB)) { - DEBUG(dbgs() << " Adding destination " << TargetBB->getName() << "\n"); - ReturnTargets.insert(TargetBB); - } +void llvm::calculateCatchReturnSuccessorColors(const Function *Fn, + WinEHFuncInfo &FuncInfo) { + for (const BasicBlock &BB : *Fn) { + const auto *CatchRet = dyn_cast<CatchReturnInst>(BB.getTerminator()); + if (!CatchRet) + continue; + // A 'catchret' returns to the outer scope's color. + Value *ParentPad = CatchRet->getParentPad(); + const BasicBlock *Color; + if (isa<ConstantTokenNone>(ParentPad)) + Color = &Fn->getEntryBlock(); + else + Color = cast<Instruction>(ParentPad)->getParent(); + // Record the catchret successor's funclet membership. + FuncInfo.CatchRetSuccessorColorMap[CatchRet] = Color; } - IndirectBrInst *NewBranch = - IndirectBrInst::Create(const_cast<Value *>(IBr->getAddress()), - ReturnTargets.size(), NewBB); - for (auto *Target : ReturnTargets) - NewBranch->addDestination(const_cast<BasicBlock*>(Target)); - - // The operands and targets of the branch instruction are remapped later - // because it is a terminator. Tell the cloning code to clone the - // blocks we just added to the target list. - return CloningDirector::CloneSuccessors; } -CloningDirector::CloningAction -WinEHCatchDirector::handleInvoke(ValueToValueMapTy &VMap, - const InvokeInst *Invoke, BasicBlock *NewBB) { - return CloningDirector::CloneInstruction; -} +void WinEHPrepare::demotePHIsOnFunclets(Function &F) { + // Strip PHI nodes off of EH pads. + SmallVector<PHINode *, 16> PHINodes; + for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) { + BasicBlock *BB = &*FI++; + if (!BB->isEHPad()) + continue; + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { + Instruction *I = &*BI++; + auto *PN = dyn_cast<PHINode>(I); + // Stop at the first non-PHI. + if (!PN) + break; -CloningDirector::CloningAction -WinEHCatchDirector::handleResume(ValueToValueMapTy &VMap, - const ResumeInst *Resume, BasicBlock *NewBB) { - // Resume instructions shouldn't be reachable from catch handlers. - // We still need to handle it, but it will be pruned. - BasicBlock::InstListType &InstList = NewBB->getInstList(); - InstList.push_back(new UnreachableInst(NewBB->getContext())); - return CloningDirector::StopCloningBB; -} + AllocaInst *SpillSlot = insertPHILoads(PN, F); + if (SpillSlot) + insertPHIStores(PN, SpillSlot); -CloningDirector::CloningAction -WinEHCatchDirector::handleCompare(ValueToValueMapTy &VMap, - const CmpInst *Compare, BasicBlock *NewBB) { - const IntrinsicInst *IntrinCall = nullptr; - if (match(Compare->getOperand(0), m_Intrinsic<Intrinsic::eh_typeid_for>())) { - IntrinCall = dyn_cast<IntrinsicInst>(Compare->getOperand(0)); - } else if (match(Compare->getOperand(1), - m_Intrinsic<Intrinsic::eh_typeid_for>())) { - IntrinCall = dyn_cast<IntrinsicInst>(Compare->getOperand(1)); - } - if (IntrinCall) { - Value *Selector = IntrinCall->getArgOperand(0)->stripPointerCasts(); - // This causes a replacement that will collapse the landing pad CFG based - // on the filter function we intend to match. - if (Selector == CurrentSelector->stripPointerCasts()) { - VMap[Compare] = ConstantInt::get(SelectorIDType, 1); - } else { - VMap[Compare] = ConstantInt::get(SelectorIDType, 0); + PHINodes.push_back(PN); } - return CloningDirector::SkipInstruction; } - return CloningDirector::CloneInstruction; -} -CloningDirector::CloningAction WinEHCleanupDirector::handleLandingPad( - ValueToValueMapTy &VMap, const LandingPadInst *LPad, BasicBlock *NewBB) { - // The MS runtime will terminate the process if an exception occurs in a - // cleanup handler, so we shouldn't encounter landing pads in the actual - // cleanup code, but they may appear in catch blocks. Depending on where - // we started cloning we may see one, but it will get dropped during dead - // block pruning. - Instruction *NewInst = new UnreachableInst(NewBB->getContext()); - VMap[LPad] = NewInst; - BasicBlock::InstListType &InstList = NewBB->getInstList(); - InstList.push_back(NewInst); - return CloningDirector::StopCloningBB; -} - -CloningDirector::CloningAction WinEHCleanupDirector::handleBeginCatch( - ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) { - // Cleanup code may flow into catch blocks or the catch block may be part - // of a branch that will be optimized away. We'll insert a return - // instruction now, but it may be pruned before the cloning process is - // complete. - ReturnInst::Create(NewBB->getContext(), nullptr, NewBB); - return CloningDirector::StopCloningBB; -} - -CloningDirector::CloningAction WinEHCleanupDirector::handleEndCatch( - ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) { - // Cleanup handlers nested within catch handlers may begin with a call to - // eh.endcatch. We can just ignore that instruction. - return CloningDirector::SkipInstruction; -} - -CloningDirector::CloningAction WinEHCleanupDirector::handleTypeIdFor( - ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) { - // If we encounter a selector comparison while cloning a cleanup handler, - // we want to stop cloning immediately. Anything after the dispatch - // will be outlined into a different handler. - BasicBlock *CatchHandler; - Constant *Selector; - BasicBlock *NextBB; - if (isSelectorDispatch(const_cast<BasicBlock *>(Inst->getParent()), - CatchHandler, Selector, NextBB)) { - ReturnInst::Create(NewBB->getContext(), nullptr, NewBB); - return CloningDirector::StopCloningBB; - } - // If eg.typeid.for is called for any other reason, it can be ignored. - VMap[Inst] = ConstantInt::get(SelectorIDType, 0); - return CloningDirector::SkipInstruction; -} - -CloningDirector::CloningAction WinEHCleanupDirector::handleIndirectBr( - ValueToValueMapTy &VMap, - const IndirectBrInst *IBr, - BasicBlock *NewBB) { - // No special handling is required for cleanup cloning. - return CloningDirector::CloneInstruction; -} - -CloningDirector::CloningAction WinEHCleanupDirector::handleInvoke( - ValueToValueMapTy &VMap, const InvokeInst *Invoke, BasicBlock *NewBB) { - // All invokes in cleanup handlers can be replaced with calls. - SmallVector<Value *, 16> CallArgs(Invoke->op_begin(), Invoke->op_end() - 3); - // Insert a normal call instruction... - CallInst *NewCall = - CallInst::Create(const_cast<Value *>(Invoke->getCalledValue()), CallArgs, - Invoke->getName(), NewBB); - NewCall->setCallingConv(Invoke->getCallingConv()); - NewCall->setAttributes(Invoke->getAttributes()); - NewCall->setDebugLoc(Invoke->getDebugLoc()); - VMap[Invoke] = NewCall; - - // Remap the operands. - llvm::RemapInstruction(NewCall, VMap, RF_None, nullptr, &Materializer); - - // Insert an unconditional branch to the normal destination. - BranchInst::Create(Invoke->getNormalDest(), NewBB); - - // The unwind destination won't be cloned into the new function, so - // we don't need to clean up its phi nodes. - - // We just added a terminator to the cloned block. - // Tell the caller to stop processing the current basic block. - return CloningDirector::CloneSuccessors; -} - -CloningDirector::CloningAction WinEHCleanupDirector::handleResume( - ValueToValueMapTy &VMap, const ResumeInst *Resume, BasicBlock *NewBB) { - ReturnInst::Create(NewBB->getContext(), nullptr, NewBB); - - // We just added a terminator to the cloned block. - // Tell the caller to stop processing the current basic block so that - // the branch instruction will be skipped. - return CloningDirector::StopCloningBB; -} - -CloningDirector::CloningAction -WinEHCleanupDirector::handleCompare(ValueToValueMapTy &VMap, - const CmpInst *Compare, BasicBlock *NewBB) { - if (match(Compare->getOperand(0), m_Intrinsic<Intrinsic::eh_typeid_for>()) || - match(Compare->getOperand(1), m_Intrinsic<Intrinsic::eh_typeid_for>())) { - VMap[Compare] = ConstantInt::get(SelectorIDType, 1); - return CloningDirector::SkipInstruction; + for (auto *PN : PHINodes) { + // There may be lingering uses on other EH PHIs being removed + PN->replaceAllUsesWith(UndefValue::get(PN->getType())); + PN->eraseFromParent(); } - return CloningDirector::CloneInstruction; -} - -WinEHFrameVariableMaterializer::WinEHFrameVariableMaterializer( - Function *OutlinedFn, Value *ParentFP, FrameVarInfoMap &FrameVarInfo) - : FrameVarInfo(FrameVarInfo), Builder(OutlinedFn->getContext()) { - BasicBlock *EntryBB = &OutlinedFn->getEntryBlock(); - - // New allocas should be inserted in the entry block, but after the parent FP - // is established if it is an instruction. - Instruction *InsertPoint = EntryBB->getFirstInsertionPt(); - if (auto *FPInst = dyn_cast<Instruction>(ParentFP)) - InsertPoint = FPInst->getNextNode(); - Builder.SetInsertPoint(EntryBB, InsertPoint); } -Value *WinEHFrameVariableMaterializer::materializeValueFor(Value *V) { - // If we're asked to materialize a static alloca, we temporarily create an - // alloca in the outlined function and add this to the FrameVarInfo map. When - // all the outlining is complete, we'll replace these temporary allocas with - // calls to llvm.localrecover. - if (auto *AV = dyn_cast<AllocaInst>(V)) { - assert(AV->isStaticAlloca() && - "cannot materialize un-demoted dynamic alloca"); - AllocaInst *NewAlloca = dyn_cast<AllocaInst>(AV->clone()); - Builder.Insert(NewAlloca, AV->getName()); - FrameVarInfo[AV].push_back(NewAlloca); - return NewAlloca; - } - - if (isa<Instruction>(V) || isa<Argument>(V)) { - Function *Parent = isa<Instruction>(V) - ? cast<Instruction>(V)->getParent()->getParent() - : cast<Argument>(V)->getParent(); - errs() - << "Failed to demote instruction used in exception handler of function " - << GlobalValue::getRealLinkageName(Parent->getName()) << ":\n"; - errs() << " " << *V << '\n'; - report_fatal_error("WinEHPrepare failed to demote instruction"); - } - - // Don't materialize other values. - return nullptr; -} +void WinEHPrepare::cloneCommonBlocks(Function &F) { + // We need to clone all blocks which belong to multiple funclets. Values are + // remapped throughout the funclet to propogate both the new instructions + // *and* the new basic blocks themselves. + for (auto &Funclets : FuncletBlocks) { + BasicBlock *FuncletPadBB = Funclets.first; + std::vector<BasicBlock *> &BlocksInFunclet = Funclets.second; + + std::vector<std::pair<BasicBlock *, BasicBlock *>> Orig2Clone; + ValueToValueMapTy VMap; + for (BasicBlock *BB : BlocksInFunclet) { + ColorVector &ColorsForBB = BlockColors[BB]; + // We don't need to do anything if the block is monochromatic. + size_t NumColorsForBB = ColorsForBB.size(); + if (NumColorsForBB == 1) + continue; -void WinEHFrameVariableMaterializer::escapeCatchObject(Value *V) { - // Catch parameter objects have to live in the parent frame. When we see a use - // of a catch parameter, add a sentinel to the multimap to indicate that it's - // used from another handler. This will prevent us from trying to sink the - // alloca into the handler and ensure that the catch parameter is present in - // the call to llvm.localescape. - FrameVarInfo[V].push_back(getCatchObjectSentinel()); -} + DEBUG_WITH_TYPE("winehprepare-coloring", + dbgs() << " Cloning block \'" << BB->getName() + << "\' for funclet \'" << FuncletPadBB->getName() + << "\'.\n"); -// This function maps the catch and cleanup handlers that are reachable from the -// specified landing pad. The landing pad sequence will have this basic shape: -// -// <cleanup handler> -// <selector comparison> -// <catch handler> -// <cleanup handler> -// <selector comparison> -// <catch handler> -// <cleanup handler> -// ... -// -// Any of the cleanup slots may be absent. The cleanup slots may be occupied by -// any arbitrary control flow, but all paths through the cleanup code must -// eventually reach the next selector comparison and no path can skip to a -// different selector comparisons, though some paths may terminate abnormally. -// Therefore, we will use a depth first search from the start of any given -// cleanup block and stop searching when we find the next selector comparison. -// -// If the landingpad instruction does not have a catch clause, we will assume -// that any instructions other than selector comparisons and catch handlers can -// be ignored. In practice, these will only be the boilerplate instructions. -// -// The catch handlers may also have any control structure, but we are only -// interested in the start of the catch handlers, so we don't need to actually -// follow the flow of the catch handlers. The start of the catch handlers can -// be located from the compare instructions, but they can be skipped in the -// flow by following the contrary branch. -void WinEHPrepare::mapLandingPadBlocks(LandingPadInst *LPad, - LandingPadActions &Actions) { - unsigned int NumClauses = LPad->getNumClauses(); - unsigned int HandlersFound = 0; - BasicBlock *BB = LPad->getParent(); - - DEBUG(dbgs() << "Mapping landing pad: " << BB->getName() << "\n"); - - if (NumClauses == 0) { - findCleanupHandlers(Actions, BB, nullptr); - return; - } + // Create a new basic block and copy instructions into it! + BasicBlock *CBB = + CloneBasicBlock(BB, VMap, Twine(".for.", FuncletPadBB->getName())); + // Insert the clone immediately after the original to ensure determinism + // and to keep the same relative ordering of any funclet's blocks. + CBB->insertInto(&F, BB->getNextNode()); - VisitedBlockSet VisitedBlocks; + // Add basic block mapping. + VMap[BB] = CBB; - while (HandlersFound != NumClauses) { - BasicBlock *NextBB = nullptr; + // Record delta operations that we need to perform to our color mappings. + Orig2Clone.emplace_back(BB, CBB); + } - // Skip over filter clauses. - if (LPad->isFilter(HandlersFound)) { - ++HandlersFound; + // If nothing was cloned, we're done cloning in this funclet. + if (Orig2Clone.empty()) continue; + + // Update our color mappings to reflect that one block has lost a color and + // another has gained a color. + for (auto &BBMapping : Orig2Clone) { + BasicBlock *OldBlock = BBMapping.first; + BasicBlock *NewBlock = BBMapping.second; + + BlocksInFunclet.push_back(NewBlock); + ColorVector &NewColors = BlockColors[NewBlock]; + assert(NewColors.empty() && "A new block should only have one color!"); + NewColors.push_back(FuncletPadBB); + + DEBUG_WITH_TYPE("winehprepare-coloring", + dbgs() << " Assigned color \'" << FuncletPadBB->getName() + << "\' to block \'" << NewBlock->getName() + << "\'.\n"); + + BlocksInFunclet.erase( + std::remove(BlocksInFunclet.begin(), BlocksInFunclet.end(), OldBlock), + BlocksInFunclet.end()); + ColorVector &OldColors = BlockColors[OldBlock]; + OldColors.erase( + std::remove(OldColors.begin(), OldColors.end(), FuncletPadBB), + OldColors.end()); + + DEBUG_WITH_TYPE("winehprepare-coloring", + dbgs() << " Removed color \'" << FuncletPadBB->getName() + << "\' from block \'" << OldBlock->getName() + << "\'.\n"); } - // See if the clause we're looking for is a catch-all. - // If so, the catch begins immediately. - Constant *ExpectedSelector = - LPad->getClause(HandlersFound)->stripPointerCasts(); - if (isa<ConstantPointerNull>(ExpectedSelector)) { - // The catch all must occur last. - assert(HandlersFound == NumClauses - 1); - - // There can be additional selector dispatches in the call chain that we - // need to ignore. - BasicBlock *CatchBlock = nullptr; - Constant *Selector; - while (BB && isSelectorDispatch(BB, CatchBlock, Selector, NextBB)) { - DEBUG(dbgs() << " Found extra catch dispatch in block " - << CatchBlock->getName() << "\n"); - BB = NextBB; + // Loop over all of the instructions in this funclet, fixing up operand + // references as we go. This uses VMap to do all the hard work. + for (BasicBlock *BB : BlocksInFunclet) + // Loop over all instructions, fixing each one as we find it... + for (Instruction &I : *BB) + RemapInstruction(&I, VMap, + RF_IgnoreMissingEntries | RF_NoModuleLevelChanges); + + auto UpdatePHIOnClonedBlock = [&](PHINode *PN, bool IsForOldBlock) { + unsigned NumPreds = PN->getNumIncomingValues(); + for (unsigned PredIdx = 0, PredEnd = NumPreds; PredIdx != PredEnd; + ++PredIdx) { + BasicBlock *IncomingBlock = PN->getIncomingBlock(PredIdx); + ColorVector &IncomingColors = BlockColors[IncomingBlock]; + bool BlockInFunclet = IncomingColors.size() == 1 && + IncomingColors.front() == FuncletPadBB; + if (IsForOldBlock != BlockInFunclet) + continue; + PN->removeIncomingValue(IncomingBlock, /*DeletePHIIfEmpty=*/false); + // Revisit the next entry. + --PredIdx; + --PredEnd; } - - // Add the catch handler to the action list. - CatchHandler *Action = nullptr; - if (CatchHandlerMap.count(BB) && CatchHandlerMap[BB] != nullptr) { - // If the CatchHandlerMap already has an entry for this BB, re-use it. - Action = CatchHandlerMap[BB]; - assert(Action->getSelector() == ExpectedSelector); - } else { - // We don't expect a selector dispatch, but there may be a call to - // llvm.eh.begincatch, which separates catch handling code from - // cleanup code in the same control flow. This call looks for the - // begincatch intrinsic. - Action = findCatchHandler(BB, NextBB, VisitedBlocks); - if (Action) { - // For C++ EH, check if there is any interesting cleanup code before - // we begin the catch. This is important because cleanups cannot - // rethrow exceptions but code called from catches can. For SEH, it - // isn't important if some finally code before a catch-all is executed - // out of line or after recovering from the exception. - if (Personality == EHPersonality::MSVC_CXX) - findCleanupHandlers(Actions, BB, BB); - } else { - // If an action was not found, it means that the control flows - // directly into the catch-all handler and there is no cleanup code. - // That's an expected situation and we must create a catch action. - // Since this is a catch-all handler, the selector won't actually - // appear in the code anywhere. ExpectedSelector here is the constant - // null ptr that we got from the landing pad instruction. - Action = new CatchHandler(BB, ExpectedSelector, nullptr); - CatchHandlerMap[BB] = Action; - } + }; + + for (auto &BBMapping : Orig2Clone) { + BasicBlock *OldBlock = BBMapping.first; + BasicBlock *NewBlock = BBMapping.second; + for (Instruction &OldI : *OldBlock) { + auto *OldPN = dyn_cast<PHINode>(&OldI); + if (!OldPN) + break; + UpdatePHIOnClonedBlock(OldPN, /*IsForOldBlock=*/true); } - Actions.insertCatchHandler(Action); - DEBUG(dbgs() << " Catch all handler at block " << BB->getName() << "\n"); - ++HandlersFound; - - // Once we reach a catch-all, don't expect to hit a resume instruction. - BB = nullptr; - break; - } - - CatchHandler *CatchAction = findCatchHandler(BB, NextBB, VisitedBlocks); - assert(CatchAction); - - // See if there is any interesting code executed before the dispatch. - findCleanupHandlers(Actions, BB, CatchAction->getStartBlock()); - - // When the source program contains multiple nested try blocks the catch - // handlers can get strung together in such a way that we can encounter - // a dispatch for a selector that we've already had a handler for. - if (CatchAction->getSelector()->stripPointerCasts() == ExpectedSelector) { - ++HandlersFound; - - // Add the catch handler to the action list. - DEBUG(dbgs() << " Found catch dispatch in block " - << CatchAction->getStartBlock()->getName() << "\n"); - Actions.insertCatchHandler(CatchAction); - } else { - // Under some circumstances optimized IR will flow unconditionally into a - // handler block without checking the selector. This can only happen if - // the landing pad has a catch-all handler and the handler for the - // preceeding catch clause is identical to the catch-call handler - // (typically an empty catch). In this case, the handler must be shared - // by all remaining clauses. - if (isa<ConstantPointerNull>( - CatchAction->getSelector()->stripPointerCasts())) { - DEBUG(dbgs() << " Applying early catch-all handler in block " - << CatchAction->getStartBlock()->getName() - << " to all remaining clauses.\n"); - Actions.insertCatchHandler(CatchAction); - return; + for (Instruction &NewI : *NewBlock) { + auto *NewPN = dyn_cast<PHINode>(&NewI); + if (!NewPN) + break; + UpdatePHIOnClonedBlock(NewPN, /*IsForOldBlock=*/false); } - - DEBUG(dbgs() << " Found extra catch dispatch in block " - << CatchAction->getStartBlock()->getName() << "\n"); } - // Move on to the block after the catch handler. - BB = NextBB; - } - - // If we didn't wind up in a catch-all, see if there is any interesting code - // executed before the resume. - findCleanupHandlers(Actions, BB, BB); - - // It's possible that some optimization moved code into a landingpad that - // wasn't - // previously being used for cleanup. If that happens, we need to execute - // that - // extra code from a cleanup handler. - if (Actions.includesCleanup() && !LPad->isCleanup()) - LPad->setCleanup(true); -} - -// This function searches starting with the input block for the next -// block that terminates with a branch whose condition is based on a selector -// comparison. This may be the input block. See the mapLandingPadBlocks -// comments for a discussion of control flow assumptions. -// -CatchHandler *WinEHPrepare::findCatchHandler(BasicBlock *BB, - BasicBlock *&NextBB, - VisitedBlockSet &VisitedBlocks) { - // See if we've already found a catch handler use it. - // Call count() first to avoid creating a null entry for blocks - // we haven't seen before. - if (CatchHandlerMap.count(BB) && CatchHandlerMap[BB] != nullptr) { - CatchHandler *Action = cast<CatchHandler>(CatchHandlerMap[BB]); - NextBB = Action->getNextBB(); - return Action; - } + // Check to see if SuccBB has PHI nodes. If so, we need to add entries to + // the PHI nodes for NewBB now. + for (auto &BBMapping : Orig2Clone) { + BasicBlock *OldBlock = BBMapping.first; + BasicBlock *NewBlock = BBMapping.second; + for (BasicBlock *SuccBB : successors(NewBlock)) { + for (Instruction &SuccI : *SuccBB) { + auto *SuccPN = dyn_cast<PHINode>(&SuccI); + if (!SuccPN) + break; + + // Ok, we have a PHI node. Figure out what the incoming value was for + // the OldBlock. + int OldBlockIdx = SuccPN->getBasicBlockIndex(OldBlock); + if (OldBlockIdx == -1) + break; + Value *IV = SuccPN->getIncomingValue(OldBlockIdx); + + // Remap the value if necessary. + if (auto *Inst = dyn_cast<Instruction>(IV)) { + ValueToValueMapTy::iterator I = VMap.find(Inst); + if (I != VMap.end()) + IV = I->second; + } - // VisitedBlocks applies only to the current search. We still - // need to consider blocks that we've visited while mapping other - // landing pads. - VisitedBlocks.insert(BB); - - BasicBlock *CatchBlock = nullptr; - Constant *Selector = nullptr; - - // If this is the first time we've visited this block from any landing pad - // look to see if it is a selector dispatch block. - if (!CatchHandlerMap.count(BB)) { - if (isSelectorDispatch(BB, CatchBlock, Selector, NextBB)) { - CatchHandler *Action = new CatchHandler(BB, Selector, NextBB); - CatchHandlerMap[BB] = Action; - return Action; - } - // If we encounter a block containing an llvm.eh.begincatch before we - // find a selector dispatch block, the handler is assumed to be - // reached unconditionally. This happens for catch-all blocks, but - // it can also happen for other catch handlers that have been combined - // with the catch-all handler during optimization. - if (isCatchBlock(BB)) { - PointerType *Int8PtrTy = Type::getInt8PtrTy(BB->getContext()); - Constant *NullSelector = ConstantPointerNull::get(Int8PtrTy); - CatchHandler *Action = new CatchHandler(BB, NullSelector, nullptr); - CatchHandlerMap[BB] = Action; - return Action; + SuccPN->addIncoming(IV, NewBlock); + } + } } - } - // Visit each successor, looking for the dispatch. - // FIXME: We expect to find the dispatch quickly, so this will probably - // work better as a breadth first search. - for (BasicBlock *Succ : successors(BB)) { - if (VisitedBlocks.count(Succ)) - continue; + for (ValueToValueMapTy::value_type VT : VMap) { + // If there were values defined in BB that are used outside the funclet, + // then we now have to update all uses of the value to use either the + // original value, the cloned value, or some PHI derived value. This can + // require arbitrary PHI insertion, of which we are prepared to do, clean + // these up now. + SmallVector<Use *, 16> UsesToRename; - CatchHandler *Action = findCatchHandler(Succ, NextBB, VisitedBlocks); - if (Action) - return Action; - } - return nullptr; -} - -// These are helper functions to combine repeated code from findCleanupHandlers. -static void createCleanupHandler(LandingPadActions &Actions, - CleanupHandlerMapTy &CleanupHandlerMap, - BasicBlock *BB) { - CleanupHandler *Action = new CleanupHandler(BB); - CleanupHandlerMap[BB] = Action; - Actions.insertCleanupHandler(Action); - DEBUG(dbgs() << " Found cleanup code in block " - << Action->getStartBlock()->getName() << "\n"); -} - -static CallSite matchOutlinedFinallyCall(BasicBlock *BB, - Instruction *MaybeCall) { - // Look for finally blocks that Clang has already outlined for us. - // %fp = call i8* @llvm.localaddress() - // call void @"fin$parent"(iN 1, i8* %fp) - if (isLocalAddressCall(MaybeCall) && MaybeCall != BB->getTerminator()) - MaybeCall = MaybeCall->getNextNode(); - CallSite FinallyCall(MaybeCall); - if (!FinallyCall || FinallyCall.arg_size() != 2) - return CallSite(); - if (!match(FinallyCall.getArgument(0), m_SpecificInt(1))) - return CallSite(); - if (!isLocalAddressCall(FinallyCall.getArgument(1))) - return CallSite(); - return FinallyCall; -} - -static BasicBlock *followSingleUnconditionalBranches(BasicBlock *BB) { - // Skip single ubr blocks. - while (BB->getFirstNonPHIOrDbg() == BB->getTerminator()) { - auto *Br = dyn_cast<BranchInst>(BB->getTerminator()); - if (Br && Br->isUnconditional()) - BB = Br->getSuccessor(0); - else - return BB; - } - return BB; -} - -// This function searches starting with the input block for the next block that -// contains code that is not part of a catch handler and would not be eliminated -// during handler outlining. -// -void WinEHPrepare::findCleanupHandlers(LandingPadActions &Actions, - BasicBlock *StartBB, BasicBlock *EndBB) { - // Here we will skip over the following: - // - // landing pad prolog: - // - // Unconditional branches - // - // Selector dispatch - // - // Resume pattern - // - // Anything else marks the start of an interesting block - - BasicBlock *BB = StartBB; - // Anything other than an unconditional branch will kick us out of this loop - // one way or another. - while (BB) { - BB = followSingleUnconditionalBranches(BB); - // If we've already scanned this block, don't scan it again. If it is - // a cleanup block, there will be an action in the CleanupHandlerMap. - // If we've scanned it and it is not a cleanup block, there will be a - // nullptr in the CleanupHandlerMap. If we have not scanned it, there will - // be no entry in the CleanupHandlerMap. We must call count() first to - // avoid creating a null entry for blocks we haven't scanned. - if (CleanupHandlerMap.count(BB)) { - if (auto *Action = CleanupHandlerMap[BB]) { - Actions.insertCleanupHandler(Action); - DEBUG(dbgs() << " Found cleanup code in block " - << Action->getStartBlock()->getName() << "\n"); - // FIXME: This cleanup might chain into another, and we need to discover - // that. - return; - } else { - // Here we handle the case where the cleanup handler map contains a - // value for this block but the value is a nullptr. This means that - // we have previously analyzed the block and determined that it did - // not contain any cleanup code. Based on the earlier analysis, we - // know the block must end in either an unconditional branch, a - // resume or a conditional branch that is predicated on a comparison - // with a selector. Either the resume or the selector dispatch - // would terminate the search for cleanup code, so the unconditional - // branch is the only case for which we might need to continue - // searching. - BasicBlock *SuccBB = followSingleUnconditionalBranches(BB); - if (SuccBB == BB || SuccBB == EndBB) - return; - BB = SuccBB; + auto *OldI = dyn_cast<Instruction>(const_cast<Value *>(VT.first)); + if (!OldI) continue; + auto *NewI = cast<Instruction>(VT.second); + // Scan all uses of this instruction to see if it is used outside of its + // funclet, and if so, record them in UsesToRename. + for (Use &U : OldI->uses()) { + Instruction *UserI = cast<Instruction>(U.getUser()); + BasicBlock *UserBB = UserI->getParent(); + ColorVector &ColorsForUserBB = BlockColors[UserBB]; + assert(!ColorsForUserBB.empty()); + if (ColorsForUserBB.size() > 1 || + *ColorsForUserBB.begin() != FuncletPadBB) + UsesToRename.push_back(&U); } - } - // Create an entry in the cleanup handler map for this block. Initially - // we create an entry that says this isn't a cleanup block. If we find - // cleanup code, the caller will replace this entry. - CleanupHandlerMap[BB] = nullptr; + // If there are no uses outside the block, we're done with this + // instruction. + if (UsesToRename.empty()) + continue; - TerminatorInst *Terminator = BB->getTerminator(); + // We found a use of OldI outside of the funclet. Rename all uses of OldI + // that are outside its funclet to be uses of the appropriate PHI node + // etc. + SSAUpdater SSAUpdate; + SSAUpdate.Initialize(OldI->getType(), OldI->getName()); + SSAUpdate.AddAvailableValue(OldI->getParent(), OldI); + SSAUpdate.AddAvailableValue(NewI->getParent(), NewI); - // Landing pad blocks have extra instructions we need to accept. - LandingPadMap *LPadMap = nullptr; - if (BB->isLandingPad()) { - LandingPadInst *LPad = BB->getLandingPadInst(); - LPadMap = &LPadMaps[LPad]; - if (!LPadMap->isInitialized()) - LPadMap->mapLandingPad(LPad); + while (!UsesToRename.empty()) + SSAUpdate.RewriteUseAfterInsertions(*UsesToRename.pop_back_val()); } + } +} - // Look for the bare resume pattern: - // %lpad.val1 = insertvalue { i8*, i32 } undef, i8* %exn, 0 - // %lpad.val2 = insertvalue { i8*, i32 } %lpad.val1, i32 %sel, 1 - // resume { i8*, i32 } %lpad.val2 - if (auto *Resume = dyn_cast<ResumeInst>(Terminator)) { - InsertValueInst *Insert1 = nullptr; - InsertValueInst *Insert2 = nullptr; - Value *ResumeVal = Resume->getOperand(0); - // If the resume value isn't a phi or landingpad value, it should be a - // series of insertions. Identify them so we can avoid them when scanning - // for cleanups. - if (!isa<PHINode>(ResumeVal) && !isa<LandingPadInst>(ResumeVal)) { - Insert2 = dyn_cast<InsertValueInst>(ResumeVal); - if (!Insert2) - return createCleanupHandler(Actions, CleanupHandlerMap, BB); - Insert1 = dyn_cast<InsertValueInst>(Insert2->getAggregateOperand()); - if (!Insert1) - return createCleanupHandler(Actions, CleanupHandlerMap, BB); - } - for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end(); - II != IE; ++II) { - Instruction *Inst = II; - if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst)) - continue; - if (Inst == Insert1 || Inst == Insert2 || Inst == Resume) +void WinEHPrepare::removeImplausibleInstructions(Function &F) { + // Remove implausible terminators and replace them with UnreachableInst. + for (auto &Funclet : FuncletBlocks) { + BasicBlock *FuncletPadBB = Funclet.first; + std::vector<BasicBlock *> &BlocksInFunclet = Funclet.second; + Instruction *FirstNonPHI = FuncletPadBB->getFirstNonPHI(); + auto *FuncletPad = dyn_cast<FuncletPadInst>(FirstNonPHI); + auto *CatchPad = dyn_cast_or_null<CatchPadInst>(FuncletPad); + auto *CleanupPad = dyn_cast_or_null<CleanupPadInst>(FuncletPad); + + for (BasicBlock *BB : BlocksInFunclet) { + for (Instruction &I : *BB) { + CallSite CS(&I); + if (!CS) continue; - if (!Inst->hasOneUse() || - (Inst->user_back() != Insert1 && Inst->user_back() != Insert2)) { - return createCleanupHandler(Actions, CleanupHandlerMap, BB); - } - } - return; - } - BranchInst *Branch = dyn_cast<BranchInst>(Terminator); - if (Branch && Branch->isConditional()) { - // Look for the selector dispatch. - // %2 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIf to i8*)) - // %matches = icmp eq i32 %sel, %2 - // br i1 %matches, label %catch14, label %eh.resume - CmpInst *Compare = dyn_cast<CmpInst>(Branch->getCondition()); - if (!Compare || !Compare->isEquality()) - return createCleanupHandler(Actions, CleanupHandlerMap, BB); - for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end(); - II != IE; ++II) { - Instruction *Inst = II; - if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst)) - continue; - if (Inst == Compare || Inst == Branch) + Value *FuncletBundleOperand = nullptr; + if (auto BU = CS.getOperandBundle(LLVMContext::OB_funclet)) + FuncletBundleOperand = BU->Inputs.front(); + + if (FuncletBundleOperand == FuncletPad) continue; - if (match(Inst, m_Intrinsic<Intrinsic::eh_typeid_for>())) + + // Skip call sites which are nounwind intrinsics. + auto *CalledFn = + dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts()); + if (CalledFn && CalledFn->isIntrinsic() && CS.doesNotThrow()) continue; - return createCleanupHandler(Actions, CleanupHandlerMap, BB); - } - // The selector dispatch block should always terminate our search. - assert(BB == EndBB); - return; - } - if (isAsynchronousEHPersonality(Personality)) { - // If this is a landingpad block, split the block at the first non-landing - // pad instruction. - Instruction *MaybeCall = BB->getFirstNonPHIOrDbg(); - if (LPadMap) { - while (MaybeCall != BB->getTerminator() && - LPadMap->isLandingPadSpecificInst(MaybeCall)) - MaybeCall = MaybeCall->getNextNode(); + // This call site was not part of this funclet, remove it. + if (CS.isInvoke()) { + // Remove the unwind edge if it was an invoke. + removeUnwindEdge(BB); + // Get a pointer to the new call. + BasicBlock::iterator CallI = + std::prev(BB->getTerminator()->getIterator()); + auto *CI = cast<CallInst>(&*CallI); + changeToUnreachable(CI, /*UseLLVMTrap=*/false); + } else { + changeToUnreachable(&I, /*UseLLVMTrap=*/false); + } + + // There are no more instructions in the block (except for unreachable), + // we are done. + break; } - // Look for outlined finally calls on x64, since those happen to match the - // prototype provided by the runtime. - if (TheTriple.getArch() == Triple::x86_64) { - if (CallSite FinallyCall = matchOutlinedFinallyCall(BB, MaybeCall)) { - Function *Fin = FinallyCall.getCalledFunction(); - assert(Fin && "outlined finally call should be direct"); - auto *Action = new CleanupHandler(BB); - Action->setHandlerBlockOrFunc(Fin); - Actions.insertCleanupHandler(Action); - CleanupHandlerMap[BB] = Action; - DEBUG(dbgs() << " Found frontend-outlined finally call to " - << Fin->getName() << " in block " - << Action->getStartBlock()->getName() << "\n"); - - // Split the block if there were more interesting instructions and - // look for finally calls in the normal successor block. - BasicBlock *SuccBB = BB; - if (FinallyCall.getInstruction() != BB->getTerminator() && - FinallyCall.getInstruction()->getNextNode() != - BB->getTerminator()) { - SuccBB = - SplitBlock(BB, FinallyCall.getInstruction()->getNextNode(), DT); - } else { - if (FinallyCall.isInvoke()) { - SuccBB = cast<InvokeInst>(FinallyCall.getInstruction()) - ->getNormalDest(); - } else { - SuccBB = BB->getUniqueSuccessor(); - assert(SuccBB && - "splitOutlinedFinallyCalls didn't insert a branch"); - } - } - BB = SuccBB; - if (BB == EndBB) - return; - continue; + TerminatorInst *TI = BB->getTerminator(); + // CatchPadInst and CleanupPadInst can't transfer control to a ReturnInst. + bool IsUnreachableRet = isa<ReturnInst>(TI) && FuncletPad; + // The token consumed by a CatchReturnInst must match the funclet token. + bool IsUnreachableCatchret = false; + if (auto *CRI = dyn_cast<CatchReturnInst>(TI)) + IsUnreachableCatchret = CRI->getCatchPad() != CatchPad; + // The token consumed by a CleanupReturnInst must match the funclet token. + bool IsUnreachableCleanupret = false; + if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) + IsUnreachableCleanupret = CRI->getCleanupPad() != CleanupPad; + if (IsUnreachableRet || IsUnreachableCatchret || + IsUnreachableCleanupret) { + changeToUnreachable(TI, /*UseLLVMTrap=*/false); + } else if (isa<InvokeInst>(TI)) { + if (Personality == EHPersonality::MSVC_CXX && CleanupPad) { + // Invokes within a cleanuppad for the MSVC++ personality never + // transfer control to their unwind edge: the personality will + // terminate the program. + removeUnwindEdge(BB); } } } + } +} - // Anything else is either a catch block or interesting cleanup code. - for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end(); - II != IE; ++II) { - Instruction *Inst = II; - if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst)) - continue; - // Unconditional branches fall through to this loop. - if (Inst == Branch) - continue; - // If this is a catch block, there is no cleanup code to be found. - if (match(Inst, m_Intrinsic<Intrinsic::eh_begincatch>())) - return; - // If this a nested landing pad, it may contain an endcatch call. - if (match(Inst, m_Intrinsic<Intrinsic::eh_endcatch>())) - return; - // Anything else makes this interesting cleanup code. - return createCleanupHandler(Actions, CleanupHandlerMap, BB); - } - - // Only unconditional branches in empty blocks should get this far. - assert(Branch && Branch->isUnconditional()); - if (BB == EndBB) - return; - BB = Branch->getSuccessor(0); +void WinEHPrepare::cleanupPreparedFunclets(Function &F) { + // Clean-up some of the mess we made by removing useles PHI nodes, trivial + // branches, etc. + for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) { + BasicBlock *BB = &*FI++; + SimplifyInstructionsInBlock(BB); + ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true); + MergeBlockIntoPredecessor(BB); } + + // We might have some unreachable blocks after cleaning up some impossible + // control flow. + removeUnreachableBlocks(F); } -// This is a public function, declared in WinEHFuncInfo.h and is also -// referenced by WinEHNumbering in FunctionLoweringInfo.cpp. -void llvm::parseEHActions( - const IntrinsicInst *II, - SmallVectorImpl<std::unique_ptr<ActionHandler>> &Actions) { - assert(II->getIntrinsicID() == Intrinsic::eh_actions && - "attempted to parse non eh.actions intrinsic"); - for (unsigned I = 0, E = II->getNumArgOperands(); I != E;) { - uint64_t ActionKind = - cast<ConstantInt>(II->getArgOperand(I))->getZExtValue(); - if (ActionKind == /*catch=*/1) { - auto *Selector = cast<Constant>(II->getArgOperand(I + 1)); - ConstantInt *EHObjIndex = cast<ConstantInt>(II->getArgOperand(I + 2)); - int64_t EHObjIndexVal = EHObjIndex->getSExtValue(); - Constant *Handler = cast<Constant>(II->getArgOperand(I + 3)); - I += 4; - auto CH = make_unique<CatchHandler>(/*BB=*/nullptr, Selector, - /*NextBB=*/nullptr); - CH->setHandlerBlockOrFunc(Handler); - CH->setExceptionVarIndex(EHObjIndexVal); - Actions.push_back(std::move(CH)); - } else if (ActionKind == 0) { - Constant *Handler = cast<Constant>(II->getArgOperand(I + 1)); - I += 2; - auto CH = make_unique<CleanupHandler>(/*BB=*/nullptr); - CH->setHandlerBlockOrFunc(Handler); - Actions.push_back(std::move(CH)); - } else { - llvm_unreachable("Expected either a catch or cleanup handler!"); +void WinEHPrepare::verifyPreparedFunclets(Function &F) { + // Recolor the CFG to verify that all is well. + for (BasicBlock &BB : F) { + size_t NumColors = BlockColors[&BB].size(); + assert(NumColors == 1 && "Expected monochromatic BB!"); + if (NumColors == 0) + report_fatal_error("Uncolored BB!"); + if (NumColors > 1) + report_fatal_error("Multicolor BB!"); + if (!DisableDemotion) { + bool EHPadHasPHI = BB.isEHPad() && isa<PHINode>(BB.begin()); + assert(!EHPadHasPHI && "EH Pad still has a PHI!"); + if (EHPadHasPHI) + report_fatal_error("EH Pad still has a PHI!"); } } - std::reverse(Actions.begin(), Actions.end()); } -namespace { -struct WinEHNumbering { - WinEHNumbering(WinEHFuncInfo &FuncInfo) : FuncInfo(FuncInfo), - CurrentBaseState(-1), NextState(0) {} +bool WinEHPrepare::prepareExplicitEH(Function &F) { + // Remove unreachable blocks. It is not valuable to assign them a color and + // their existence can trick us into thinking values are alive when they are + // not. + removeUnreachableBlocks(F); - WinEHFuncInfo &FuncInfo; - int CurrentBaseState; - int NextState; + // Determine which blocks are reachable from which funclet entries. + colorFunclets(F); - SmallVector<std::unique_ptr<ActionHandler>, 4> HandlerStack; - SmallPtrSet<const Function *, 4> VisitedHandlers; + cloneCommonBlocks(F); - int currentEHNumber() const { - return HandlerStack.empty() ? CurrentBaseState : HandlerStack.back()->getEHState(); - } + if (!DisableDemotion) + demotePHIsOnFunclets(F); - void createUnwindMapEntry(int ToState, ActionHandler *AH); - void createTryBlockMapEntry(int TryLow, int TryHigh, - ArrayRef<CatchHandler *> Handlers); - void processCallSite(MutableArrayRef<std::unique_ptr<ActionHandler>> Actions, - ImmutableCallSite CS); - void popUnmatchedActions(int FirstMismatch); - void calculateStateNumbers(const Function &F); - void findActionRootLPads(const Function &F); -}; -} + if (!DisableCleanups) { + removeImplausibleInstructions(F); -void WinEHNumbering::createUnwindMapEntry(int ToState, ActionHandler *AH) { - WinEHUnwindMapEntry UME; - UME.ToState = ToState; - if (auto *CH = dyn_cast_or_null<CleanupHandler>(AH)) - UME.Cleanup = cast<Function>(CH->getHandlerBlockOrFunc()); - else - UME.Cleanup = nullptr; - FuncInfo.UnwindMap.push_back(UME); -} - -void WinEHNumbering::createTryBlockMapEntry(int TryLow, int TryHigh, - ArrayRef<CatchHandler *> Handlers) { - // See if we already have an entry for this set of handlers. - // This is using iterators rather than a range-based for loop because - // if we find the entry we're looking for we'll need the iterator to erase it. - int NumHandlers = Handlers.size(); - auto I = FuncInfo.TryBlockMap.begin(); - auto E = FuncInfo.TryBlockMap.end(); - for ( ; I != E; ++I) { - auto &Entry = *I; - if (Entry.HandlerArray.size() != (size_t)NumHandlers) - continue; - int N; - for (N = 0; N < NumHandlers; ++N) { - if (Entry.HandlerArray[N].Handler != Handlers[N]->getHandlerBlockOrFunc()) - break; // breaks out of inner loop - } - // If all the handlers match, this is what we were looking for. - if (N == NumHandlers) { - break; - } - } - - // If we found an existing entry for this set of handlers, extend the range - // but move the entry to the end of the map vector. The order of entries - // in the map is critical to the way that the runtime finds handlers. - // FIXME: Depending on what has happened with block ordering, this may - // incorrectly combine entries that should remain separate. - if (I != E) { - // Copy the existing entry. - WinEHTryBlockMapEntry Entry = *I; - Entry.TryLow = std::min(TryLow, Entry.TryLow); - Entry.TryHigh = std::max(TryHigh, Entry.TryHigh); - assert(Entry.TryLow <= Entry.TryHigh); - // Erase the old entry and add this one to the back. - FuncInfo.TryBlockMap.erase(I); - FuncInfo.TryBlockMap.push_back(Entry); - return; + cleanupPreparedFunclets(F); } - // If we didn't find an entry, create a new one. - WinEHTryBlockMapEntry TBME; - TBME.TryLow = TryLow; - TBME.TryHigh = TryHigh; - assert(TBME.TryLow <= TBME.TryHigh); - for (CatchHandler *CH : Handlers) { - WinEHHandlerType HT; - if (CH->getSelector()->isNullValue()) { - HT.Adjectives = 0x40; - HT.TypeDescriptor = nullptr; - } else { - auto *GV = cast<GlobalVariable>(CH->getSelector()->stripPointerCasts()); - // Selectors are always pointers to GlobalVariables with 'struct' type. - // The struct has two fields, adjectives and a type descriptor. - auto *CS = cast<ConstantStruct>(GV->getInitializer()); - HT.Adjectives = - cast<ConstantInt>(CS->getAggregateElement(0U))->getZExtValue(); - HT.TypeDescriptor = - cast<GlobalVariable>(CS->getAggregateElement(1)->stripPointerCasts()); - } - HT.Handler = cast<Function>(CH->getHandlerBlockOrFunc()); - HT.CatchObjRecoverIdx = CH->getExceptionVarIndex(); - TBME.HandlerArray.push_back(HT); - } - FuncInfo.TryBlockMap.push_back(TBME); -} + verifyPreparedFunclets(F); -static void print_name(const Value *V) { -#ifndef NDEBUG - if (!V) { - DEBUG(dbgs() << "null"); - return; - } + BlockColors.clear(); + FuncletBlocks.clear(); - if (const auto *F = dyn_cast<Function>(V)) - DEBUG(dbgs() << F->getName()); - else - DEBUG(V->dump()); -#endif + return true; } -void WinEHNumbering::processCallSite( - MutableArrayRef<std::unique_ptr<ActionHandler>> Actions, - ImmutableCallSite CS) { - DEBUG(dbgs() << "processCallSite (EH state = " << currentEHNumber() - << ") for: "); - print_name(CS ? CS.getCalledValue() : nullptr); - DEBUG(dbgs() << '\n'); - - DEBUG(dbgs() << "HandlerStack: \n"); - for (int I = 0, E = HandlerStack.size(); I < E; ++I) { - DEBUG(dbgs() << " "); - print_name(HandlerStack[I]->getHandlerBlockOrFunc()); - DEBUG(dbgs() << '\n'); - } - DEBUG(dbgs() << "Actions: \n"); - for (int I = 0, E = Actions.size(); I < E; ++I) { - DEBUG(dbgs() << " "); - print_name(Actions[I]->getHandlerBlockOrFunc()); - DEBUG(dbgs() << '\n'); - } - int FirstMismatch = 0; - for (int E = std::min(HandlerStack.size(), Actions.size()); FirstMismatch < E; - ++FirstMismatch) { - if (HandlerStack[FirstMismatch]->getHandlerBlockOrFunc() != - Actions[FirstMismatch]->getHandlerBlockOrFunc()) - break; - } - - // Remove unmatched actions from the stack and process their EH states. - popUnmatchedActions(FirstMismatch); - - DEBUG(dbgs() << "Pushing actions for CallSite: "); - print_name(CS ? CS.getCalledValue() : nullptr); - DEBUG(dbgs() << '\n'); - - bool LastActionWasCatch = false; - const LandingPadInst *LastRootLPad = nullptr; - for (size_t I = FirstMismatch; I != Actions.size(); ++I) { - // We can reuse eh states when pushing two catches for the same invoke. - bool CurrActionIsCatch = isa<CatchHandler>(Actions[I].get()); - auto *Handler = cast<Function>(Actions[I]->getHandlerBlockOrFunc()); - // Various conditions can lead to a handler being popped from the - // stack and re-pushed later. That shouldn't create a new state. - // FIXME: Can code optimization lead to re-used handlers? - if (FuncInfo.HandlerEnclosedState.count(Handler)) { - // If we already assigned the state enclosed by this handler re-use it. - Actions[I]->setEHState(FuncInfo.HandlerEnclosedState[Handler]); +// TODO: Share loads when one use dominates another, or when a catchpad exit +// dominates uses (needs dominators). +AllocaInst *WinEHPrepare::insertPHILoads(PHINode *PN, Function &F) { + BasicBlock *PHIBlock = PN->getParent(); + AllocaInst *SpillSlot = nullptr; + Instruction *EHPad = PHIBlock->getFirstNonPHI(); + + if (!isa<TerminatorInst>(EHPad)) { + // If the EHPad isn't a terminator, then we can insert a load in this block + // that will dominate all uses. + SpillSlot = new AllocaInst(PN->getType(), nullptr, + Twine(PN->getName(), ".wineh.spillslot"), + &F.getEntryBlock().front()); + Value *V = new LoadInst(SpillSlot, Twine(PN->getName(), ".wineh.reload"), + &*PHIBlock->getFirstInsertionPt()); + PN->replaceAllUsesWith(V); + return SpillSlot; + } + + // Otherwise, we have a PHI on a terminator EHPad, and we give up and insert + // loads of the slot before every use. + DenseMap<BasicBlock *, Value *> Loads; + for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end(); + UI != UE;) { + Use &U = *UI++; + auto *UsingInst = cast<Instruction>(U.getUser()); + if (isa<PHINode>(UsingInst) && UsingInst->getParent()->isEHPad()) { + // Use is on an EH pad phi. Leave it alone; we'll insert loads and + // stores for it separately. continue; } - const LandingPadInst* RootLPad = FuncInfo.RootLPad[Handler]; - if (CurrActionIsCatch && LastActionWasCatch && RootLPad == LastRootLPad) { - DEBUG(dbgs() << "setEHState for handler to " << currentEHNumber() << "\n"); - Actions[I]->setEHState(currentEHNumber()); - } else { - DEBUG(dbgs() << "createUnwindMapEntry(" << currentEHNumber() << ", "); - print_name(Actions[I]->getHandlerBlockOrFunc()); - DEBUG(dbgs() << ") with EH state " << NextState << "\n"); - createUnwindMapEntry(currentEHNumber(), Actions[I].get()); - DEBUG(dbgs() << "setEHState for handler to " << NextState << "\n"); - Actions[I]->setEHState(NextState); - NextState++; - } - HandlerStack.push_back(std::move(Actions[I])); - LastActionWasCatch = CurrActionIsCatch; - LastRootLPad = RootLPad; + replaceUseWithLoad(PN, U, SpillSlot, Loads, F); } - - // This is used to defer numbering states for a handler until after the - // last time it appears in an invoke action list. - if (CS.isInvoke()) { - for (int I = 0, E = HandlerStack.size(); I < E; ++I) { - auto *Handler = cast<Function>(HandlerStack[I]->getHandlerBlockOrFunc()); - if (FuncInfo.LastInvoke[Handler] != cast<InvokeInst>(CS.getInstruction())) - continue; - FuncInfo.LastInvokeVisited[Handler] = true; - DEBUG(dbgs() << "Last invoke of "); - print_name(Handler); - DEBUG(dbgs() << " has been visited.\n"); - } - } - - DEBUG(dbgs() << "In EHState " << currentEHNumber() << " for CallSite: "); - print_name(CS ? CS.getCalledValue() : nullptr); - DEBUG(dbgs() << '\n'); + return SpillSlot; } -void WinEHNumbering::popUnmatchedActions(int FirstMismatch) { - // Don't recurse while we are looping over the handler stack. Instead, defer - // the numbering of the catch handlers until we are done popping. - SmallVector<CatchHandler *, 4> PoppedCatches; - for (int I = HandlerStack.size() - 1; I >= FirstMismatch; --I) { - std::unique_ptr<ActionHandler> Handler = HandlerStack.pop_back_val(); - if (isa<CatchHandler>(Handler.get())) - PoppedCatches.push_back(cast<CatchHandler>(Handler.release())); - } +// TODO: improve store placement. Inserting at def is probably good, but need +// to be careful not to introduce interfering stores (needs liveness analysis). +// TODO: identify related phi nodes that can share spill slots, and share them +// (also needs liveness). +void WinEHPrepare::insertPHIStores(PHINode *OriginalPHI, + AllocaInst *SpillSlot) { + // Use a worklist of (Block, Value) pairs -- the given Value needs to be + // stored to the spill slot by the end of the given Block. + SmallVector<std::pair<BasicBlock *, Value *>, 4> Worklist; - int TryHigh = NextState - 1; - int LastTryLowIdx = 0; - for (int I = 0, E = PoppedCatches.size(); I != E; ++I) { - CatchHandler *CH = PoppedCatches[I]; - DEBUG(dbgs() << "Popped handler with state " << CH->getEHState() << "\n"); - if (I + 1 == E || CH->getEHState() != PoppedCatches[I + 1]->getEHState()) { - int TryLow = CH->getEHState(); - auto Handlers = - makeArrayRef(&PoppedCatches[LastTryLowIdx], I - LastTryLowIdx + 1); - DEBUG(dbgs() << "createTryBlockMapEntry(" << TryLow << ", " << TryHigh); - for (size_t J = 0; J < Handlers.size(); ++J) { - DEBUG(dbgs() << ", "); - print_name(Handlers[J]->getHandlerBlockOrFunc()); - } - DEBUG(dbgs() << ")\n"); - createTryBlockMapEntry(TryLow, TryHigh, Handlers); - LastTryLowIdx = I + 1; - } - } + Worklist.push_back({OriginalPHI->getParent(), OriginalPHI}); - for (CatchHandler *CH : PoppedCatches) { - if (auto *F = dyn_cast<Function>(CH->getHandlerBlockOrFunc())) { - if (FuncInfo.LastInvokeVisited[F]) { - DEBUG(dbgs() << "Assigning base state " << NextState << " to "); - print_name(F); - DEBUG(dbgs() << '\n'); - FuncInfo.HandlerBaseState[F] = NextState; - DEBUG(dbgs() << "createUnwindMapEntry(" << currentEHNumber() - << ", null)\n"); - createUnwindMapEntry(currentEHNumber(), nullptr); - ++NextState; - calculateStateNumbers(*F); + while (!Worklist.empty()) { + BasicBlock *EHBlock; + Value *InVal; + std::tie(EHBlock, InVal) = Worklist.pop_back_val(); + + PHINode *PN = dyn_cast<PHINode>(InVal); + if (PN && PN->getParent() == EHBlock) { + // The value is defined by another PHI we need to remove, with no room to + // insert a store after the PHI, so each predecessor needs to store its + // incoming value. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) { + Value *PredVal = PN->getIncomingValue(i); + + // Undef can safely be skipped. + if (isa<UndefValue>(PredVal)) + continue; + + insertPHIStore(PN->getIncomingBlock(i), PredVal, SpillSlot, Worklist); } - else { - DEBUG(dbgs() << "Deferring handling of "); - print_name(F); - DEBUG(dbgs() << " until last invoke visited.\n"); + } else { + // We need to store InVal, which dominates EHBlock, but can't put a store + // in EHBlock, so need to put stores in each predecessor. + for (BasicBlock *PredBlock : predecessors(EHBlock)) { + insertPHIStore(PredBlock, InVal, SpillSlot, Worklist); } } - delete CH; } } -void WinEHNumbering::calculateStateNumbers(const Function &F) { - auto I = VisitedHandlers.insert(&F); - if (!I.second) - return; // We've already visited this handler, don't renumber it. +void WinEHPrepare::insertPHIStore( + BasicBlock *PredBlock, Value *PredVal, AllocaInst *SpillSlot, + SmallVectorImpl<std::pair<BasicBlock *, Value *>> &Worklist) { - int OldBaseState = CurrentBaseState; - if (FuncInfo.HandlerBaseState.count(&F)) { - CurrentBaseState = FuncInfo.HandlerBaseState[&F]; - } - - size_t SavedHandlerStackSize = HandlerStack.size(); - - DEBUG(dbgs() << "Calculating state numbers for: " << F.getName() << '\n'); - SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList; - for (const BasicBlock &BB : F) { - for (const Instruction &I : BB) { - const auto *CI = dyn_cast<CallInst>(&I); - if (!CI || CI->doesNotThrow()) - continue; - processCallSite(None, CI); - } - const auto *II = dyn_cast<InvokeInst>(BB.getTerminator()); - if (!II) - continue; - const LandingPadInst *LPI = II->getLandingPadInst(); - auto *ActionsCall = dyn_cast<IntrinsicInst>(LPI->getNextNode()); - if (!ActionsCall) - continue; - parseEHActions(ActionsCall, ActionList); - if (ActionList.empty()) - continue; - processCallSite(ActionList, II); - ActionList.clear(); - FuncInfo.LandingPadStateMap[LPI] = currentEHNumber(); - DEBUG(dbgs() << "Assigning state " << currentEHNumber() - << " to landing pad at " << LPI->getParent()->getName() - << '\n'); + if (PredBlock->isEHPad() && + isa<TerminatorInst>(PredBlock->getFirstNonPHI())) { + // Pred is unsplittable, so we need to queue it on the worklist. + Worklist.push_back({PredBlock, PredVal}); + return; } - // Pop any actions that were pushed on the stack for this function. - popUnmatchedActions(SavedHandlerStackSize); - - DEBUG(dbgs() << "Assigning max state " << NextState - 1 - << " to " << F.getName() << '\n'); - FuncInfo.CatchHandlerMaxState[&F] = NextState - 1; - - CurrentBaseState = OldBaseState; + // Otherwise, insert the store at the end of the basic block. + new StoreInst(PredVal, SpillSlot, PredBlock->getTerminator()); } -// This function follows the same basic traversal as calculateStateNumbers -// but it is necessary to identify the root landing pad associated -// with each action before we start assigning state numbers. -void WinEHNumbering::findActionRootLPads(const Function &F) { - auto I = VisitedHandlers.insert(&F); - if (!I.second) - return; // We've already visited this handler, don't revisit it. - - SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList; - for (const BasicBlock &BB : F) { - const auto *II = dyn_cast<InvokeInst>(BB.getTerminator()); - if (!II) - continue; - const LandingPadInst *LPI = II->getLandingPadInst(); - auto *ActionsCall = dyn_cast<IntrinsicInst>(LPI->getNextNode()); - if (!ActionsCall) - continue; - - assert(ActionsCall->getIntrinsicID() == Intrinsic::eh_actions); - parseEHActions(ActionsCall, ActionList); - if (ActionList.empty()) - continue; - for (int I = 0, E = ActionList.size(); I < E; ++I) { - if (auto *Handler - = dyn_cast<Function>(ActionList[I]->getHandlerBlockOrFunc())) { - FuncInfo.LastInvoke[Handler] = II; - // Don't replace the root landing pad if we previously saw this - // handler in a different function. - if (FuncInfo.RootLPad.count(Handler) && - FuncInfo.RootLPad[Handler]->getParent()->getParent() != &F) - continue; - DEBUG(dbgs() << "Setting root lpad for "); - print_name(Handler); - DEBUG(dbgs() << " to " << LPI->getParent()->getName() << '\n'); - FuncInfo.RootLPad[Handler] = LPI; - } +void WinEHPrepare::replaceUseWithLoad(Value *V, Use &U, AllocaInst *&SpillSlot, + DenseMap<BasicBlock *, Value *> &Loads, + Function &F) { + // Lazilly create the spill slot. + if (!SpillSlot) + SpillSlot = new AllocaInst(V->getType(), nullptr, + Twine(V->getName(), ".wineh.spillslot"), + &F.getEntryBlock().front()); + + auto *UsingInst = cast<Instruction>(U.getUser()); + if (auto *UsingPHI = dyn_cast<PHINode>(UsingInst)) { + // If this is a PHI node, we can't insert a load of the value before + // the use. Instead insert the load in the predecessor block + // corresponding to the incoming value. + // + // Note that if there are multiple edges from a basic block to this + // PHI node that we cannot have multiple loads. The problem is that + // the resulting PHI node will have multiple values (from each load) + // coming in from the same block, which is illegal SSA form. + // For this reason, we keep track of and reuse loads we insert. + BasicBlock *IncomingBlock = UsingPHI->getIncomingBlock(U); + if (auto *CatchRet = + dyn_cast<CatchReturnInst>(IncomingBlock->getTerminator())) { + // Putting a load above a catchret and use on the phi would still leave + // a cross-funclet def/use. We need to split the edge, change the + // catchret to target the new block, and put the load there. + BasicBlock *PHIBlock = UsingInst->getParent(); + BasicBlock *NewBlock = SplitEdge(IncomingBlock, PHIBlock); + // SplitEdge gives us: + // IncomingBlock: + // ... + // br label %NewBlock + // NewBlock: + // catchret label %PHIBlock + // But we need: + // IncomingBlock: + // ... + // catchret label %NewBlock + // NewBlock: + // br label %PHIBlock + // So move the terminators to each others' blocks and swap their + // successors. + BranchInst *Goto = cast<BranchInst>(IncomingBlock->getTerminator()); + Goto->removeFromParent(); + CatchRet->removeFromParent(); + IncomingBlock->getInstList().push_back(CatchRet); + NewBlock->getInstList().push_back(Goto); + Goto->setSuccessor(0, PHIBlock); + CatchRet->setSuccessor(NewBlock); + // Update the color mapping for the newly split edge. + ColorVector &ColorsForPHIBlock = BlockColors[PHIBlock]; + BlockColors[NewBlock] = ColorsForPHIBlock; + for (BasicBlock *FuncletPad : ColorsForPHIBlock) + FuncletBlocks[FuncletPad].push_back(NewBlock); + // Treat the new block as incoming for load insertion. + IncomingBlock = NewBlock; } - // Walk the actions again and look for nested handlers. This has to - // happen after all of the actions have been processed in the current - // function. - for (int I = 0, E = ActionList.size(); I < E; ++I) - if (auto *Handler - = dyn_cast<Function>(ActionList[I]->getHandlerBlockOrFunc())) - findActionRootLPads(*Handler); - ActionList.clear(); + Value *&Load = Loads[IncomingBlock]; + // Insert the load into the predecessor block + if (!Load) + Load = new LoadInst(SpillSlot, Twine(V->getName(), ".wineh.reload"), + /*Volatile=*/false, IncomingBlock->getTerminator()); + + U.set(Load); + } else { + // Reload right before the old use. + auto *Load = new LoadInst(SpillSlot, Twine(V->getName(), ".wineh.reload"), + /*Volatile=*/false, UsingInst); + U.set(Load); } } -void llvm::calculateWinCXXEHStateNumbers(const Function *ParentFn, - WinEHFuncInfo &FuncInfo) { - // Return if it's already been done. - if (!FuncInfo.LandingPadStateMap.empty()) - return; - - WinEHNumbering Num(FuncInfo); - Num.findActionRootLPads(*ParentFn); - // The VisitedHandlers list is used by both findActionRootLPads and - // calculateStateNumbers, but both functions need to visit all handlers. - Num.VisitedHandlers.clear(); - Num.calculateStateNumbers(*ParentFn); - // Pop everything on the handler stack. - // It may be necessary to call this more than once because a handler can - // be pushed on the stack as a result of clearing the stack. - while (!Num.HandlerStack.empty()) - Num.processCallSite(None, ImmutableCallSite()); +void WinEHFuncInfo::addIPToStateRange(const InvokeInst *II, + MCSymbol *InvokeBegin, + MCSymbol *InvokeEnd) { + assert(InvokeStateMap.count(II) && + "should get invoke with precomputed state"); + LabelToStateMap[InvokeBegin] = std::make_pair(InvokeStateMap[II], InvokeEnd); } + +WinEHFuncInfo::WinEHFuncInfo() {} |