diff options
Diffstat (limited to 'llvm/lib/CodeGen')
97 files changed, 3680 insertions, 1766 deletions
diff --git a/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 5984063627b0..5c64622c7245 100644 --- a/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -561,8 +561,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( << ":\n"); std::map<unsigned, BitVector> RenameRegisterMap; unsigned SuperReg = 0; - for (unsigned i = 0, e = Regs.size(); i != e; ++i) { - unsigned Reg = Regs[i]; + for (unsigned Reg : Regs) { if ((SuperReg == 0) || TRI->isSuperRegister(SuperReg, Reg)) SuperReg = Reg; @@ -584,8 +583,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( } // All group registers should be a subreg of SuperReg. - for (unsigned i = 0, e = Regs.size(); i != e; ++i) { - unsigned Reg = Regs[i]; + for (unsigned Reg : Regs) { if (Reg == SuperReg) continue; bool IsSub = TRI->isSubRegister(SuperReg, Reg); // FIXME: remove this once PR18663 has been properly fixed. For now, @@ -646,8 +644,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( // For each referenced group register (which must be a SuperReg or // a subregister of SuperReg), find the corresponding subregister // of NewSuperReg and make sure it is free to be renamed. - for (unsigned i = 0, e = Regs.size(); i != e; ++i) { - unsigned Reg = Regs[i]; + for (unsigned Reg : Regs) { unsigned NewReg = 0; if (Reg == SuperReg) { NewReg = NewSuperReg; diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp index 7d8a73e12d3a..7e68e5e22879 100644 --- a/llvm/lib/CodeGen/Analysis.cpp +++ b/llvm/lib/CodeGen/Analysis.cpp @@ -712,8 +712,8 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F, // The manipulations performed when we're looking through an insertvalue or // an extractvalue would happen at the front of the RetPath list, so since // we have to copy it anyway it's more efficient to create a reversed copy. - SmallVector<unsigned, 4> TmpRetPath(RetPath.rbegin(), RetPath.rend()); - SmallVector<unsigned, 4> TmpCallPath(CallPath.rbegin(), CallPath.rend()); + SmallVector<unsigned, 4> TmpRetPath(llvm::reverse(RetPath)); + SmallVector<unsigned, 4> TmpCallPath(llvm::reverse(CallPath)); // Finally, we can check whether the value produced by the tail call at this // index is compatible with the value we return. diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 828cb760b82e..533f20535655 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -180,7 +180,7 @@ Align AsmPrinter::getGVAlignment(const GlobalObject *GV, const DataLayout &DL, Alignment = InAlign; // If the GV has a specified alignment, take it into account. - const MaybeAlign GVAlign(GV->getAlignment()); + const MaybeAlign GVAlign(GV->getAlign()); if (!GVAlign) return Alignment; @@ -288,7 +288,11 @@ bool AsmPrinter::doInitialization(Module &M) { // use the directive, where it would need the same conditionalization // anyway. const Triple &Target = TM.getTargetTriple(); - OutStreamer->emitVersionForTarget(Target, M.getSDKVersion()); + Triple TVT(M.getDarwinTargetVariantTriple()); + OutStreamer->emitVersionForTarget( + Target, M.getSDKVersion(), + M.getDarwinTargetVariantTriple().empty() ? nullptr : &TVT, + M.getDarwinTargetVariantSDKVersion()); // Allow the target to emit any magic that it wants at the start of the file. emitStartOfAsmFile(M); @@ -1856,6 +1860,17 @@ bool AsmPrinter::doFinalization(Module &M) { continue; OutStreamer->emitSymbolAttribute(getSymbol(&GO), MCSA_WeakReference); } + if (shouldEmitWeakSwiftAsyncExtendedFramePointerFlags()) { + auto SymbolName = "swift_async_extendedFramePointerFlags"; + auto Global = M.getGlobalVariable(SymbolName); + if (!Global) { + auto Int8PtrTy = Type::getInt8PtrTy(M.getContext()); + Global = new GlobalVariable(M, Int8PtrTy, false, + GlobalValue::ExternalWeakLinkage, nullptr, + SymbolName); + OutStreamer->emitSymbolAttribute(getSymbol(Global), MCSA_WeakReference); + } + } } // Print aliases in topological order, that is, for each alias a = b, @@ -2502,6 +2517,9 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { if (const auto *Equiv = dyn_cast<DSOLocalEquivalent>(CV)) return getObjFileLowering().lowerDSOLocalEquivalent(Equiv, TM); + if (const NoCFIValue *NC = dyn_cast<NoCFIValue>(CV)) + return MCSymbolRefExpr::create(getSymbol(NC->getGlobalValue()), Ctx); + const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV); if (!CE) { llvm_unreachable("Unknown constant value to lower!"); diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index 85ff84484ced..d621108408f0 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -611,8 +611,8 @@ static SourceLanguage MapDWLangToCVLang(unsigned DWLang) { void CodeViewDebug::beginModule(Module *M) { // If module doesn't have named metadata anchors or COFF debug section // is not available, skip any debug info related stuff. - if (!M->getNamedMetadata("llvm.dbg.cu") || - !Asm->getObjFileLowering().getCOFFDebugSymbolsSection()) { + NamedMDNode *CUs = M->getNamedMetadata("llvm.dbg.cu"); + if (!CUs || !Asm->getObjFileLowering().getCOFFDebugSymbolsSection()) { Asm = nullptr; return; } @@ -622,7 +622,6 @@ void CodeViewDebug::beginModule(Module *M) { TheCPU = mapArchToCVCPUType(Triple(M->getTargetTriple()).getArch()); // Get the current source language. - NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu"); const MDNode *Node = *CUs->operands().begin(); const auto *CU = cast<DICompileUnit>(Node); @@ -650,6 +649,7 @@ void CodeViewDebug::endModule() { switchToDebugSectionForSymbol(nullptr); MCSymbol *CompilerInfo = beginCVSubsection(DebugSubsectionKind::Symbols); + emitObjName(); emitCompilerInformation(); endCVSubsection(CompilerInfo); @@ -785,6 +785,29 @@ void CodeViewDebug::emitTypeGlobalHashes() { } } +void CodeViewDebug::emitObjName() { + MCSymbol *CompilerEnd = beginSymbolRecord(SymbolKind::S_OBJNAME); + + StringRef PathRef(Asm->TM.Options.ObjectFilenameForDebug); + llvm::SmallString<256> PathStore(PathRef); + + if (PathRef.empty() || PathRef == "-") { + // Don't emit the filename if we're writing to stdout or to /dev/null. + PathRef = {}; + } else { + llvm::sys::path::remove_dots(PathStore, /*remove_dot_dot=*/true); + PathRef = PathStore; + } + + OS.AddComment("Signature"); + OS.emitIntValue(0, 4); + + OS.AddComment("Object name"); + emitNullTerminatedSymbolName(OS, PathRef); + + endSymbolRecord(CompilerEnd); +} + namespace { struct Version { int Part[4]; diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h index 6f88e15ee8fe..d1fc3cdccb20 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h @@ -302,6 +302,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { void emitTypeGlobalHashes(); + void emitObjName(); + void emitCompilerInformation(); void emitBuildInfo(); diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 0d2736178f0f..9b73f0ab2f05 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -779,7 +779,7 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo(); auto AddEntry = [&](const DbgValueLocEntry &Entry, - DIExpressionCursor &Cursor) { + DIExpressionCursor &Cursor) { if (Entry.isLocation()) { if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Entry.getLoc().getReg())) @@ -788,11 +788,19 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, // If there is an expression, emit raw unsigned bytes. DwarfExpr.addUnsignedConstant(Entry.getInt()); } else if (Entry.isConstantFP()) { + // DwarfExpression does not support arguments wider than 64 bits + // (see PR52584). + // TODO: Consider chunking expressions containing overly wide + // arguments into separate pointer-sized fragment expressions. APInt RawBytes = Entry.getConstantFP()->getValueAPF().bitcastToAPInt(); - DwarfExpr.addUnsignedConstant(RawBytes); + if (RawBytes.getBitWidth() > 64) + return false; + DwarfExpr.addUnsignedConstant(RawBytes.getZExtValue()); } else if (Entry.isConstantInt()) { APInt RawBytes = Entry.getConstantInt()->getValue(); - DwarfExpr.addUnsignedConstant(RawBytes); + if (RawBytes.getBitWidth() > 64) + return false; + DwarfExpr.addUnsignedConstant(RawBytes.getZExtValue()); } else if (Entry.isTargetIndexLocation()) { TargetIndexLocation Loc = Entry.getTargetIndexLocation(); // TODO TargetIndexLocation is a target-independent. Currently only the @@ -805,11 +813,12 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, return true; }; - DwarfExpr.addExpression( - std::move(Cursor), - [&](unsigned Idx, DIExpressionCursor &Cursor) -> bool { - return AddEntry(DVal->getLocEntries()[Idx], Cursor); - }); + if (!DwarfExpr.addExpression( + std::move(Cursor), + [&](unsigned Idx, DIExpressionCursor &Cursor) -> bool { + return AddEntry(DVal->getLocEntries()[Idx], Cursor); + })) + return VariableDie; // Now attach the location information to the DIE. addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize()); diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 047676d4c11e..48134f1fd774 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -1224,17 +1224,15 @@ void DwarfDebug::beginModule(Module *M) { CU.getOrCreateGlobalVariableDIE(GV, sortGlobalExprs(GVMap[GV])); } - for (auto *Ty : CUNode->getEnumTypes()) { - // The enum types array by design contains pointers to - // MDNodes rather than DIRefs. Unique them here. + for (auto *Ty : CUNode->getEnumTypes()) CU.getOrCreateTypeDIE(cast<DIType>(Ty)); - } + for (auto *Ty : CUNode->getRetainedTypes()) { // The retained types array by design contains pointers to // MDNodes rather than DIRefs. Unique them here. if (DIType *RT = dyn_cast<DIType>(Ty)) - // There is no point in force-emitting a forward declaration. - CU.getOrCreateTypeDIE(RT); + // There is no point in force-emitting a forward declaration. + CU.getOrCreateTypeDIE(RT); } // Emit imported_modules last so that the relevant context is already // available. diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index 6409c39e7849..37407c98e75f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -463,15 +463,14 @@ static bool isMemoryLocation(DIExpressionCursor ExprCursor) { return true; } -void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, - unsigned FragmentOffsetInBits) { +void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor) { addExpression(std::move(ExprCursor), [](unsigned Idx, DIExpressionCursor &Cursor) -> bool { llvm_unreachable("unhandled opcode found in expression"); }); } -void DwarfExpression::addExpression( +bool DwarfExpression::addExpression( DIExpressionCursor &&ExprCursor, llvm::function_ref<bool(unsigned, DIExpressionCursor &)> InsertArg) { // Entry values can currently only cover the initial register location, @@ -496,7 +495,7 @@ void DwarfExpression::addExpression( case dwarf::DW_OP_LLVM_arg: if (!InsertArg(Op->getArg(0), ExprCursor)) { LocationKind = Unknown; - return; + return false; } break; case dwarf::DW_OP_LLVM_fragment: { @@ -527,7 +526,7 @@ void DwarfExpression::addExpression( setSubRegisterPiece(0, 0); // Reset the location description kind. LocationKind = Unknown; - return; + return true; } case dwarf::DW_OP_plus_uconst: assert(!isRegisterLocation()); @@ -630,6 +629,8 @@ void DwarfExpression::addExpression( if (isImplicitLocation() && !isParameterValue()) // Turn this into an implicit location description. addStackValue(); + + return true; } /// add masking operations to stencil out a subregister. diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h index 513e9072309e..e605fe2f7d39 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h @@ -340,16 +340,17 @@ public: /// create one if necessary. unsigned getOrCreateBaseType(unsigned BitSize, dwarf::TypeKind Encoding); + /// Emit all remaining operations in the DIExpressionCursor. The + /// cursor must not contain any DW_OP_LLVM_arg operations. + void addExpression(DIExpressionCursor &&Expr); + /// Emit all remaining operations in the DIExpressionCursor. - /// - /// \param FragmentOffsetInBits If this is one fragment out of multiple - /// locations, this is the offset of the - /// fragment inside the entire variable. - void addExpression(DIExpressionCursor &&Expr, - unsigned FragmentOffsetInBits = 0); - void - addExpression(DIExpressionCursor &&Expr, - llvm::function_ref<bool(unsigned, DIExpressionCursor &)> InsertArg); + /// DW_OP_LLVM_arg operations are resolved by calling (\p InsertArg). + // + /// \return false if any call to (\p InsertArg) returns false. + bool addExpression( + DIExpressionCursor &&Expr, + llvm::function_ref<bool(unsigned, DIExpressionCursor &)> InsertArg); /// If applicable, emit an empty DW_OP_piece / DW_OP_bit_piece to advance to /// the fragment described by \c Expr. diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 976e35905144..6b6d63f14f87 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -536,6 +536,18 @@ void DwarfUnit::addThrownTypes(DIE &Die, DINodeArray ThrownTypes) { } } +void DwarfUnit::addAccess(DIE &Die, DINode::DIFlags Flags) { + if ((Flags & DINode::FlagAccessibility) == DINode::FlagProtected) + addUInt(Die, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_protected); + else if ((Flags & DINode::FlagAccessibility) == DINode::FlagPrivate) + addUInt(Die, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_private); + else if ((Flags & DINode::FlagAccessibility) == DINode::FlagPublic) + addUInt(Die, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_public); +} + DIE *DwarfUnit::getOrCreateContextDIE(const DIScope *Context) { if (!Context || isa<DIFile>(Context)) return &getUnitDie(); @@ -842,13 +854,17 @@ void DwarfUnit::addAnnotation(DIE &Buffer, DINodeArray Annotations) { for (const Metadata *Annotation : Annotations->operands()) { const MDNode *MD = cast<MDNode>(Annotation); const MDString *Name = cast<MDString>(MD->getOperand(0)); - - // Currently, only MDString is supported with btf_decl_tag attribute. - const MDString *Value = cast<MDString>(MD->getOperand(1)); + const auto &Value = MD->getOperand(1); DIE &AnnotationDie = createAndAddDIE(dwarf::DW_TAG_LLVM_annotation, Buffer); addString(AnnotationDie, dwarf::DW_AT_name, Name->getString()); - addString(AnnotationDie, dwarf::DW_AT_const_value, Value->getString()); + if (const auto *Data = dyn_cast<MDString>(Value)) + addString(AnnotationDie, dwarf::DW_AT_const_value, Data->getString()); + else if (const auto *Data = dyn_cast<ConstantAsMetadata>(Value)) + addConstantValue(AnnotationDie, Data->getValue()->getUniqueInteger(), + /*Unsigned=*/true); + else + assert(false && "Unsupported annotation value type"); } } @@ -1007,6 +1023,9 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) { if (CTy->isForwardDecl()) addFlag(Buffer, dwarf::DW_AT_declaration); + // Add accessibility info if available. + addAccess(Buffer, CTy->getFlags()); + // Add source line info if available. if (!CTy->isForwardDecl()) addSourceLine(Buffer, CTy); @@ -1308,15 +1327,7 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie, if (SP->isNoReturn()) addFlag(SPDie, dwarf::DW_AT_noreturn); - if (SP->isProtected()) - addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, - dwarf::DW_ACCESS_protected); - else if (SP->isPrivate()) - addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, - dwarf::DW_ACCESS_private); - else if (SP->isPublic()) - addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, - dwarf::DW_ACCESS_public); + addAccess(SPDie, SP->getFlags()); if (SP->isExplicit()) addFlag(SPDie, dwarf::DW_AT_explicit); @@ -1666,16 +1677,8 @@ DIE &DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) { } } - if (DT->isProtected()) - addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, - dwarf::DW_ACCESS_protected); - else if (DT->isPrivate()) - addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, - dwarf::DW_ACCESS_private); - // Otherwise C++ member and base classes are considered public. - else if (DT->isPublic()) - addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, - dwarf::DW_ACCESS_public); + addAccess(MemberDie, DT->getFlags()); + if (DT->isVirtual()) addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, dwarf::DW_VIRTUALITY_virtual); @@ -1717,15 +1720,7 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) { // FIXME: We could omit private if the parent is a class_type, and // public if the parent is something else. - if (DT->isProtected()) - addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, - dwarf::DW_ACCESS_protected); - else if (DT->isPrivate()) - addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, - dwarf::DW_ACCESS_private); - else if (DT->isPublic()) - addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, - dwarf::DW_ACCESS_public); + addAccess(StaticMemberDIE, DT->getFlags()); if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DT->getConstant())) addConstantValue(StaticMemberDIE, CI, Ty); diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h index 8140279adaef..54b0079dd7ce 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -226,6 +226,9 @@ public: /// Add thrown types. void addThrownTypes(DIE &Die, DINodeArray ThrownTypes); + /// Add the accessibility attribute. + void addAccess(DIE &Die, DINode::DIFlags Flags); + /// Add a new type attribute to the specified entity. /// /// This takes and attribute parameter because DW_AT_friend attributes are diff --git a/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index a9fb31d42679..3ade262d9af2 100644 --- a/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -112,16 +112,12 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info, EmitCamlGlobal(M, AP, "frametable"); int NumDescriptors = 0; - for (GCModuleInfo::FuncInfoVec::iterator I = Info.funcinfo_begin(), - IE = Info.funcinfo_end(); - I != IE; ++I) { - GCFunctionInfo &FI = **I; - if (FI.getStrategy().getName() != getStrategy().getName()) + for (std::unique_ptr<GCFunctionInfo> &FI : + llvm::make_range(Info.funcinfo_begin(), Info.funcinfo_end())) { + if (FI->getStrategy().getName() != getStrategy().getName()) // this function is managed by some other GC continue; - for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) { - NumDescriptors++; - } + NumDescriptors += FI->size(); } if (NumDescriptors >= 1 << 16) { @@ -131,35 +127,34 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info, AP.emitInt16(NumDescriptors); AP.emitAlignment(IntPtrSize == 4 ? Align(4) : Align(8)); - for (GCModuleInfo::FuncInfoVec::iterator I = Info.funcinfo_begin(), - IE = Info.funcinfo_end(); - I != IE; ++I) { - GCFunctionInfo &FI = **I; - if (FI.getStrategy().getName() != getStrategy().getName()) + for (std::unique_ptr<GCFunctionInfo> &FI : + llvm::make_range(Info.funcinfo_begin(), Info.funcinfo_end())) { + if (FI->getStrategy().getName() != getStrategy().getName()) // this function is managed by some other GC continue; - uint64_t FrameSize = FI.getFrameSize(); + uint64_t FrameSize = FI->getFrameSize(); if (FrameSize >= 1 << 16) { // Very rude! - report_fatal_error("Function '" + FI.getFunction().getName() + + report_fatal_error("Function '" + FI->getFunction().getName() + "' is too large for the ocaml GC! " "Frame size " + Twine(FrameSize) + ">= 65536.\n" "(" + - Twine(reinterpret_cast<uintptr_t>(&FI)) + ")"); + Twine(reinterpret_cast<uintptr_t>(FI.get())) + ")"); } AP.OutStreamer->AddComment("live roots for " + - Twine(FI.getFunction().getName())); + Twine(FI->getFunction().getName())); AP.OutStreamer->AddBlankLine(); - for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) { - size_t LiveCount = FI.live_size(J); + for (GCFunctionInfo::iterator J = FI->begin(), JE = FI->end(); J != JE; + ++J) { + size_t LiveCount = FI->live_size(J); if (LiveCount >= 1 << 16) { // Very rude! - report_fatal_error("Function '" + FI.getFunction().getName() + + report_fatal_error("Function '" + FI->getFunction().getName() + "' is too large for the ocaml GC! " "Live root count " + Twine(LiveCount) + " >= 65536."); @@ -169,8 +164,8 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info, AP.emitInt16(FrameSize); AP.emitInt16(LiveCount); - for (GCFunctionInfo::live_iterator K = FI.live_begin(J), - KE = FI.live_end(J); + for (GCFunctionInfo::live_iterator K = FI->live_begin(J), + KE = FI->live_end(J); K != KE; ++K) { if (K->StackOffset >= 1 << 16) { // Very rude! diff --git a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp index 9e6f1a537de3..bab187f46535 100644 --- a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp @@ -47,7 +47,6 @@ void PseudoProbeHandler::emitPseudoProbe(uint64_t Guid, uint64_t Index, InlinedAt = InlinedAt->getInlinedAt(); } - SmallVector<InlineSite, 8> InlineStack(ReversedInlineStack.rbegin(), - ReversedInlineStack.rend()); + SmallVector<InlineSite, 8> InlineStack(llvm::reverse(ReversedInlineStack)); Asm->OutStreamer->emitPseudoProbe(Guid, Index, Type, Attr, InlineStack); } diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp index 64dadc82b48b..0ff67f7ca00a 100644 --- a/llvm/lib/CodeGen/BranchFolding.cpp +++ b/llvm/lib/CodeGen/BranchFolding.cpp @@ -1125,8 +1125,8 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // If this is a large problem, avoid visiting the same basic blocks multiple // times. if (MergePotentials.size() == TailMergeThreshold) - for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) - TriedMerging.insert(MergePotentials[i].getBlock()); + for (MergePotentialsElt &Elt : MergePotentials) + TriedMerging.insert(Elt.getBlock()); if (MergePotentials.size() >= 2) MadeChange |= TryTailMergeBlocks(IBB, PredBB, MinCommonTailLength); diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp index 863a0e1e0b56..5f9982cd155d 100644 --- a/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -15,13 +15,13 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/CodeGen/StackMaps.h" #include <cassert> #include <tuple> @@ -35,7 +35,7 @@ void VirtRegAuxInfo::calculateSpillWeightsAndHints() { MachineRegisterInfo &MRI = MF.getRegInfo(); for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { - unsigned Reg = Register::index2VirtReg(I); + Register Reg = Register::index2VirtReg(I); if (MRI.reg_nodbg_empty(Reg)) continue; calculateSpillWeightAndHint(LIS.getInterval(Reg)); @@ -64,14 +64,14 @@ static Register copyHint(const MachineInstr *MI, unsigned Reg, if (Register::isVirtualRegister(HReg)) return Sub == HSub ? HReg : Register(); - const TargetRegisterClass *rc = MRI.getRegClass(Reg); + const TargetRegisterClass *RC = MRI.getRegClass(Reg); MCRegister CopiedPReg = HSub ? TRI.getSubReg(HReg, HSub) : HReg.asMCReg(); - if (rc->contains(CopiedPReg)) + if (RC->contains(CopiedPReg)) return CopiedPReg; // Check if reg:sub matches so that a super register could be hinted. if (Sub) - return TRI.getMatchingSuperReg(CopiedPReg, Sub, rc); + return TRI.getMatchingSuperReg(CopiedPReg, Sub, RC); return 0; } @@ -80,8 +80,8 @@ static Register copyHint(const MachineInstr *MI, unsigned Reg, static bool isRematerializable(const LiveInterval &LI, const LiveIntervals &LIS, const VirtRegMap &VRM, const TargetInstrInfo &TII) { - unsigned Reg = LI.reg(); - unsigned Original = VRM.getOriginal(Reg); + Register Reg = LI.reg(); + Register Original = VRM.getOriginal(Reg); for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); I != E; ++I) { const VNInfo *VNI = *I; @@ -183,8 +183,8 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start, bool ShouldUpdateLI = !IsLocalSplitArtifact; if (IsLocalSplitArtifact) { - MachineBasicBlock *localMBB = LIS.getMBBFromIndex(*End); - assert(localMBB == LIS.getMBBFromIndex(*Start) && + MachineBasicBlock *LocalMBB = LIS.getMBBFromIndex(*End); + assert(LocalMBB == LIS.getMBBFromIndex(*Start) && "start and end are expected to be in the same basic block"); // Local split artifact will have 2 additional copy instructions and they @@ -192,8 +192,8 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start, // localLI = COPY other // ... // other = COPY localLI - TotalWeight += LiveIntervals::getSpillWeight(true, false, &MBFI, localMBB); - TotalWeight += LiveIntervals::getSpillWeight(false, true, &MBFI, localMBB); + TotalWeight += LiveIntervals::getSpillWeight(true, false, &MBFI, LocalMBB); + TotalWeight += LiveIntervals::getSpillWeight(false, true, &MBFI, LocalMBB); NumInstr += 2; } diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index bbdd8aab502e..7c236a9785d8 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -68,6 +68,8 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeMachineCSEPass(Registry); initializeMachineCombinerPass(Registry); initializeMachineCopyPropagationPass(Registry); + initializeMachineCycleInfoPrinterPassPass(Registry); + initializeMachineCycleInfoWrapperPassPass(Registry); initializeMachineDominatorTreePass(Registry); initializeMachineFunctionPrinterPassPass(Registry); initializeMachineLICMPass(Registry); diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index ac4180c4c3ab..747f4e4fdecc 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -4831,9 +4831,7 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(F->getParent()->getDataLayout(), &TRI, *CI); - for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { - TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; - + for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) { // Compute the constraint code and ConstraintType to use. TLI.ComputeConstraintToUse(OpInfo, SDValue()); @@ -5617,9 +5615,7 @@ bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) { TargetLowering::AsmOperandInfoVector TargetConstraints = TLI->ParseConstraints(*DL, TRI, *CS); unsigned ArgNo = 0; - for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { - TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; - + for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) { // Compute the constraint code and ConstraintType to use. TLI->ComputeConstraintToUse(OpInfo, SDValue()); @@ -6856,8 +6852,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { // Use reverse iterator because later select may use the value of the // earlier select, and we need to propagate value through earlier select // to get the PHI operand. - for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) { - SelectInst *SI = *It; + for (SelectInst *SI : llvm::reverse(ASI)) { // The select itself is replaced with a PHI Node. PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front()); PN->takeName(SI); diff --git a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp index 4e98d49206b5..901409ea9f8f 100644 --- a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -405,8 +405,7 @@ findSuitableFreeRegister(RegRefIter RegRefBegin, const TargetRegisterClass *RC, SmallVectorImpl<unsigned> &Forbid) { ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(RC); - for (unsigned i = 0; i != Order.size(); ++i) { - unsigned NewReg = Order[i]; + for (unsigned NewReg : Order) { // Don't replace a register with itself. if (NewReg == AntiDepReg) continue; // Don't replace a register with one that was recently used to repair diff --git a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp index 0bb186a02416..5579152f1ce0 100644 --- a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -142,9 +142,9 @@ bool DeadMachineInstructionElim::eliminateDeadMI(MachineFunction &MF) { if (isDead(&MI)) { LLVM_DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << MI); // It is possible that some DBG_VALUE instructions refer to this - // instruction. They get marked as undef and will be deleted - // in the live debug variable analysis. - MI.eraseFromParentAndMarkDBGValuesForRemoval(); + // instruction. They will be deleted in the live debug variable + // analysis. + MI.eraseFromParent(); AnyChanges = true; ++NumDeletes; continue; diff --git a/llvm/lib/CodeGen/EarlyIfConversion.cpp b/llvm/lib/CodeGen/EarlyIfConversion.cpp index 90883212a275..0b5469b02637 100644 --- a/llvm/lib/CodeGen/EarlyIfConversion.cpp +++ b/llvm/lib/CodeGen/EarlyIfConversion.cpp @@ -210,9 +210,9 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) { // Check all instructions, except the terminators. It is assumed that // terminators never have side effects or define any used register values. - for (MachineBasicBlock::iterator I = MBB->begin(), - E = MBB->getFirstTerminator(); I != E; ++I) { - if (I->isDebugInstr()) + for (MachineInstr &MI : + llvm::make_range(MBB->begin(), MBB->getFirstTerminator())) { + if (MI.isDebugInstr()) continue; if (++InstrCount > BlockInstrLimit && !Stress) { @@ -222,28 +222,28 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) { } // There shouldn't normally be any phis in a single-predecessor block. - if (I->isPHI()) { - LLVM_DEBUG(dbgs() << "Can't hoist: " << *I); + if (MI.isPHI()) { + LLVM_DEBUG(dbgs() << "Can't hoist: " << MI); return false; } // Don't speculate loads. Note that it may be possible and desirable to // speculate GOT or constant pool loads that are guaranteed not to trap, // but we don't support that for now. - if (I->mayLoad()) { - LLVM_DEBUG(dbgs() << "Won't speculate load: " << *I); + if (MI.mayLoad()) { + LLVM_DEBUG(dbgs() << "Won't speculate load: " << MI); return false; } // We never speculate stores, so an AA pointer isn't necessary. bool DontMoveAcrossStore = true; - if (!I->isSafeToMove(nullptr, DontMoveAcrossStore)) { - LLVM_DEBUG(dbgs() << "Can't speculate: " << *I); + if (!MI.isSafeToMove(nullptr, DontMoveAcrossStore)) { + LLVM_DEBUG(dbgs() << "Can't speculate: " << MI); return false; } // Check for any dependencies on Head instructions. - if (!InstrDependenciesAllowIfConv(&(*I))) + if (!InstrDependenciesAllowIfConv(&MI)) return false; } return true; diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index 17094a8e44f8..d061664e8c5d 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -256,7 +256,7 @@ mergeVectorRegsToResultRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs, LLT PartLLT = MRI.getType(SrcRegs[0]); // Deal with v3s16 split into v2s16 - LLT LCMTy = getLCMType(LLTy, PartLLT); + LLT LCMTy = getCoverTy(LLTy, PartLLT); if (LCMTy == LLTy) { // Common case where no padding is needed. assert(DstRegs.size() == 1); @@ -267,21 +267,9 @@ mergeVectorRegsToResultRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs, // widening the original value. Register UnmergeSrcReg; if (LCMTy != PartLLT) { - // e.g. A <3 x s16> value was split to <2 x s16> - // %register_value0:_(<2 x s16>) - // %register_value1:_(<2 x s16>) - // %undef:_(<2 x s16>) = G_IMPLICIT_DEF - // %concat:_<6 x s16>) = G_CONCAT_VECTORS %reg_value0, %reg_value1, %undef - // %dst_reg:_(<3 x s16>), %dead:_(<3 x s16>) = G_UNMERGE_VALUES %concat - const int NumWide = LCMTy.getSizeInBits() / PartLLT.getSizeInBits(); - Register Undef = B.buildUndef(PartLLT).getReg(0); - - // Build vector of undefs. - SmallVector<Register, 8> WidenedSrcs(NumWide, Undef); - - // Replace the first sources with the real registers. - std::copy(SrcRegs.begin(), SrcRegs.end(), WidenedSrcs.begin()); - UnmergeSrcReg = B.buildConcatVectors(LCMTy, WidenedSrcs).getReg(0); + assert(DstRegs.size() == 1); + return B.buildDeleteTrailingVectorElements(DstRegs[0], + B.buildMerge(LCMTy, SrcRegs)); } else { // We don't need to widen anything if we're extracting a scalar which was // promoted to a vector e.g. s8 -> v4s8 -> s8 @@ -298,6 +286,8 @@ mergeVectorRegsToResultRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs, for (int I = DstRegs.size(); I != NumDst; ++I) PadDstRegs[I] = MRI.createGenericVirtualRegister(LLTy); + if (PadDstRegs.size() == 1) + return B.buildDeleteTrailingVectorElements(DstRegs[0], UnmergeSrcReg); return B.buildUnmerge(PadDstRegs, UnmergeSrcReg); } @@ -485,7 +475,7 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs, MachineRegisterInfo &MRI = *B.getMRI(); LLT DstTy = MRI.getType(DstRegs[0]); - LLT LCMTy = getLCMType(SrcTy, PartTy); + LLT LCMTy = getCoverTy(SrcTy, PartTy); const unsigned DstSize = DstTy.getSizeInBits(); const unsigned SrcSize = SrcTy.getSizeInBits(); @@ -493,7 +483,7 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs, Register UnmergeSrc = SrcReg; - if (CoveringSize != SrcSize) { + if (!LCMTy.isVector() && CoveringSize != SrcSize) { // For scalars, it's common to be able to use a simple extension. if (SrcTy.isScalar() && DstTy.isScalar()) { CoveringSize = alignTo(SrcSize, DstSize); @@ -510,14 +500,10 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs, } } - // Unmerge to the original registers and pad with dead defs. - SmallVector<Register, 8> UnmergeResults(DstRegs.begin(), DstRegs.end()); - for (unsigned Size = DstSize * DstRegs.size(); Size != CoveringSize; - Size += DstSize) { - UnmergeResults.push_back(MRI.createGenericVirtualRegister(DstTy)); - } + if (LCMTy.isVector() && CoveringSize != SrcSize) + UnmergeSrc = B.buildPadVectorWithUndefElements(LCMTy, SrcReg).getReg(0); - B.buildUnmerge(UnmergeResults, UnmergeSrc); + B.buildUnmerge(DstRegs, UnmergeSrc); } bool CallLowering::determineAndHandleAssignments( diff --git a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp index 381c6df5c97a..dd1ef74e8ad0 100644 --- a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp @@ -135,7 +135,7 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF, // Erase dead insts before even adding to the list. if (isTriviallyDead(CurMI, *MRI)) { LLVM_DEBUG(dbgs() << CurMI << "Is dead; erasing.\n"); - CurMI.eraseFromParentAndMarkDBGValuesForRemoval(); + CurMI.eraseFromParent(); continue; } WorkList.deferred_insert(&CurMI); diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 755b3b844570..f7a634dad61a 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1551,8 +1551,8 @@ void CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI, Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2}); // These were one use so it's safe to remove them. - MatchInfo.Shift2->eraseFromParentAndMarkDBGValuesForRemoval(); - MatchInfo.Logic->eraseFromParentAndMarkDBGValuesForRemoval(); + MatchInfo.Shift2->eraseFromParent(); + MatchInfo.Logic->eraseFromParent(); MI.eraseFromParent(); } diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 87cc60d51bc2..6d415c9c7f90 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -338,9 +338,10 @@ bool IRTranslator::translateCompare(const User &U, MIRBuilder.buildCopy( Res, getOrCreateVReg(*Constant::getAllOnesValue(U.getType()))); else { - assert(CI && "Instruction should be CmpInst"); - MIRBuilder.buildFCmp(Pred, Res, Op0, Op1, - MachineInstr::copyFlagsFromInstruction(*CI)); + uint16_t Flags = 0; + if (CI) + Flags = MachineInstr::copyFlagsFromInstruction(*CI); + MIRBuilder.buildFCmp(Pred, Res, Op0, Op1, Flags); } return true; @@ -3502,7 +3503,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { // Get rid of the now empty basic block. EntryBB->removeSuccessor(&NewEntryBB); MF->remove(EntryBB); - MF->DeleteMachineBasicBlock(EntryBB); + MF->deleteMachineBasicBlock(EntryBB); assert(&MF->front() == &NewEntryBB && "New entry wasn't next in the list of basic block!"); diff --git a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp index 9b2692486384..b10c9272a508 100644 --- a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp +++ b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp @@ -163,7 +163,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { // If so, erase it. if (isTriviallyDead(MI, MRI)) { LLVM_DEBUG(dbgs() << "Is dead; erasing.\n"); - MI.eraseFromParentAndMarkDBGValuesForRemoval(); + MI.eraseFromParent(); continue; } @@ -255,8 +255,12 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { MachineInstr *MI = nullptr; if (!MRI.def_empty(VReg)) MI = &*MRI.def_instr_begin(VReg); - else if (!MRI.use_empty(VReg)) + else if (!MRI.use_empty(VReg)) { MI = &*MRI.use_instr_begin(VReg); + // Debug value instruction is permitted to use undefined vregs. + if (MI->isDebugValue()) + continue; + } if (!MI) continue; diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index e09cd26eb0c1..e8a8efd5dad4 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -176,16 +176,18 @@ bool LegalizerHelper::extractParts(Register Reg, LLT RegTy, return true; } + // Perform irregular split. Leftover is last element of RegPieces. if (MainTy.isVector()) { - unsigned EltSize = MainTy.getScalarSizeInBits(); - if (LeftoverSize % EltSize != 0) - return false; - LeftoverTy = LLT::scalarOrVector( - ElementCount::getFixed(LeftoverSize / EltSize), EltSize); - } else { - LeftoverTy = LLT::scalar(LeftoverSize); + SmallVector<Register, 8> RegPieces; + extractVectorParts(Reg, MainTy.getNumElements(), RegPieces); + for (unsigned i = 0; i < RegPieces.size() - 1; ++i) + VRegs.push_back(RegPieces[i]); + LeftoverRegs.push_back(RegPieces[RegPieces.size() - 1]); + LeftoverTy = MRI.getType(LeftoverRegs[0]); + return true; } + LeftoverTy = LLT::scalar(LeftoverSize); // For irregular sizes, extract the individual parts. for (unsigned I = 0; I != NumParts; ++I) { Register NewReg = MRI.createGenericVirtualRegister(MainTy); @@ -203,6 +205,44 @@ bool LegalizerHelper::extractParts(Register Reg, LLT RegTy, return true; } +void LegalizerHelper::extractVectorParts(Register Reg, unsigned NumElts, + SmallVectorImpl<Register> &VRegs) { + LLT RegTy = MRI.getType(Reg); + assert(RegTy.isVector() && "Expected a vector type"); + + LLT EltTy = RegTy.getElementType(); + LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy); + unsigned RegNumElts = RegTy.getNumElements(); + unsigned LeftoverNumElts = RegNumElts % NumElts; + unsigned NumNarrowTyPieces = RegNumElts / NumElts; + + // Perfect split without leftover + if (LeftoverNumElts == 0) + return extractParts(Reg, NarrowTy, NumNarrowTyPieces, VRegs); + + // Irregular split. Provide direct access to all elements for artifact + // combiner using unmerge to elements. Then build vectors with NumElts + // elements. Remaining element(s) will be (used to build vector) Leftover. + SmallVector<Register, 8> Elts; + extractParts(Reg, EltTy, RegNumElts, Elts); + + unsigned Offset = 0; + // Requested sub-vectors of NarrowTy. + for (unsigned i = 0; i < NumNarrowTyPieces; ++i, Offset += NumElts) { + ArrayRef<Register> Pieces(&Elts[Offset], NumElts); + VRegs.push_back(MIRBuilder.buildMerge(NarrowTy, Pieces).getReg(0)); + } + + // Leftover element(s). + if (LeftoverNumElts == 1) { + VRegs.push_back(Elts[Offset]); + } else { + LLT LeftoverTy = LLT::fixed_vector(LeftoverNumElts, EltTy); + ArrayRef<Register> Pieces(&Elts[Offset], LeftoverNumElts); + VRegs.push_back(MIRBuilder.buildMerge(LeftoverTy, Pieces).getReg(0)); + } +} + void LegalizerHelper::insertParts(Register DstReg, LLT ResultTy, LLT PartTy, ArrayRef<Register> PartRegs, @@ -223,6 +263,15 @@ void LegalizerHelper::insertParts(Register DstReg, return; } + // Merge sub-vectors with different number of elements and insert into DstReg. + if (ResultTy.isVector()) { + assert(LeftoverRegs.size() == 1 && "Expected one leftover register"); + SmallVector<Register, 8> AllRegs; + for (auto Reg : concat<const Register>(PartRegs, LeftoverRegs)) + AllRegs.push_back(Reg); + return mergeMixedSubvectors(DstReg, AllRegs); + } + SmallVector<Register> GCDRegs; LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy); for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs)) @@ -231,6 +280,30 @@ void LegalizerHelper::insertParts(Register DstReg, buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs); } +void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts, + Register Reg) { + LLT Ty = MRI.getType(Reg); + SmallVector<Register, 8> RegElts; + extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts); + Elts.append(RegElts); +} + +/// Merge \p PartRegs with different types into \p DstReg. +void LegalizerHelper::mergeMixedSubvectors(Register DstReg, + ArrayRef<Register> PartRegs) { + SmallVector<Register, 8> AllElts; + for (unsigned i = 0; i < PartRegs.size() - 1; ++i) + appendVectorElts(AllElts, PartRegs[i]); + + Register Leftover = PartRegs[PartRegs.size() - 1]; + if (MRI.getType(Leftover).isScalar()) + AllElts.push_back(Leftover); + else + appendVectorElts(AllElts, Leftover); + + MIRBuilder.buildMerge(DstReg, AllElts); +} + /// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs. static void getUnmergeResults(SmallVectorImpl<Register> &Regs, const MachineInstr &MI) { @@ -916,8 +989,26 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, return Legalized; } - case TargetOpcode::G_FREEZE: - return reduceOperationWidth(MI, TypeIdx, NarrowTy); + case TargetOpcode::G_FREEZE: { + if (TypeIdx != 0) + return UnableToLegalize; + + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + // Should widen scalar first + if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0) + return UnableToLegalize; + + auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg()); + SmallVector<Register, 8> Parts; + for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) { + Parts.push_back( + MIRBuilder.buildFreeze(NarrowTy, Unmerge.getReg(i)).getReg(0)); + } + + MIRBuilder.buildMerge(MI.getOperand(0).getReg(), Parts); + MI.eraseFromParent(); + return Legalized; + } case TargetOpcode::G_ADD: case TargetOpcode::G_SUB: case TargetOpcode::G_SADDO: @@ -1372,37 +1463,17 @@ void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx) { MachineOperand &MO = MI.getOperand(OpIdx); MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); - MO.setReg(widenWithUnmerge(WideTy, MO.getReg())); + Register Dst = MO.getReg(); + Register DstExt = MRI.createGenericVirtualRegister(WideTy); + MO.setReg(DstExt); + MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt); } void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx) { MachineOperand &MO = MI.getOperand(OpIdx); - - LLT OldTy = MRI.getType(MO.getReg()); - unsigned OldElts = OldTy.getNumElements(); - unsigned NewElts = MoreTy.getNumElements(); - - unsigned NumParts = NewElts / OldElts; - - // Use concat_vectors if the result is a multiple of the number of elements. - if (NumParts * OldElts == NewElts) { - SmallVector<Register, 8> Parts; - Parts.push_back(MO.getReg()); - - Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0); - for (unsigned I = 1; I != NumParts; ++I) - Parts.push_back(ImpDef); - - auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts); - MO.setReg(Concat.getReg(0)); - return; - } - - Register MoreReg = MRI.createGenericVirtualRegister(MoreTy); - Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0); - MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0); - MO.setReg(MoreReg); + SmallVector<Register, 8> Regs; + MO.setReg(MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0)); } void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) { @@ -3558,20 +3629,83 @@ Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy, return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0); } -LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef( - MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { - Register DstReg = MI.getOperand(0).getReg(); - LLT DstTy = MRI.getType(DstReg); - LLT LCMTy = getLCMType(DstTy, NarrowTy); +#ifndef NDEBUG +/// Check that all vector operands have same number of elements. Other operands +/// should be listed in NonVecOp. +static bool hasSameNumEltsOnAllVectorOperands( + GenericMachineInstr &MI, MachineRegisterInfo &MRI, + std::initializer_list<unsigned> NonVecOpIndices) { + if (MI.getNumMemOperands() != 0) + return false; - unsigned NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits(); + LLT VecTy = MRI.getType(MI.getReg(0)); + if (!VecTy.isVector()) + return false; + unsigned NumElts = VecTy.getNumElements(); - auto NewUndef = MIRBuilder.buildUndef(NarrowTy); - SmallVector<Register, 8> Parts(NumParts, NewUndef.getReg(0)); + for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) { + MachineOperand &Op = MI.getOperand(OpIdx); + if (!Op.isReg()) { + if (!is_contained(NonVecOpIndices, OpIdx)) + return false; + continue; + } - buildWidenedRemergeToDst(DstReg, LCMTy, Parts); - MI.eraseFromParent(); - return Legalized; + LLT Ty = MRI.getType(Op.getReg()); + if (!Ty.isVector()) { + if (!is_contained(NonVecOpIndices, OpIdx)) + return false; + is_contained(NonVecOpIndices, OpIdx); + continue; + } + + if (Ty.getNumElements() != NumElts) + return false; + } + + return true; +} +#endif + +/// Fill \p DstOps with DstOps that have same number of elements combined as +/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are +/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple +/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements. +static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty, + unsigned NumElts) { + LLT LeftoverTy; + assert(Ty.isVector() && "Expected vector type"); + LLT EltTy = Ty.getElementType(); + LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy); + int NumParts, NumLeftover; + std::tie(NumParts, NumLeftover) = + getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy); + + assert(NumParts > 0 && "Error in getNarrowTypeBreakDown"); + for (int i = 0; i < NumParts; ++i) { + DstOps.push_back(NarrowTy); + } + + if (LeftoverTy.isValid()) { + assert(NumLeftover == 1 && "expected exactly one leftover"); + DstOps.push_back(LeftoverTy); + } +} + +/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps +/// made from \p Op depending on operand type. +static void broadcastSrcOp(SmallVectorImpl<SrcOp> &Ops, unsigned N, + MachineOperand &Op) { + for (unsigned i = 0; i < N; ++i) { + if (Op.isReg()) + Ops.push_back(Op.getReg()); + else if (Op.isImm()) + Ops.push_back(Op.getImm()); + else if (Op.isPredicate()) + Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate())); + else + llvm_unreachable("Unsupported type"); + } } // Handle splitting vector operations which need to have the same number of @@ -3588,335 +3722,116 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef( // s64 = G_SHL s64, s32 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorMultiEltType( - MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) { - if (TypeIdx != 0) - return UnableToLegalize; - - const LLT NarrowTy0 = NarrowTyArg; - const Register DstReg = MI.getOperand(0).getReg(); - LLT DstTy = MRI.getType(DstReg); - LLT LeftoverTy0; - - // All of the operands need to have the same number of elements, so if we can - // determine a type breakdown for the result type, we can for all of the - // source types. - int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first; - if (NumParts < 0) - return UnableToLegalize; - - SmallVector<MachineInstrBuilder, 4> NewInsts; - - SmallVector<Register, 4> DstRegs, LeftoverDstRegs; - SmallVector<Register, 4> PartRegs, LeftoverRegs; - - for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { - Register SrcReg = MI.getOperand(I).getReg(); - LLT SrcTyI = MRI.getType(SrcReg); - const auto NewEC = NarrowTy0.isVector() ? NarrowTy0.getElementCount() - : ElementCount::getFixed(1); - LLT NarrowTyI = LLT::scalarOrVector(NewEC, SrcTyI.getScalarType()); - LLT LeftoverTyI; - - // Split this operand into the requested typed registers, and any leftover - // required to reproduce the original type. - if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs, - LeftoverRegs)) - return UnableToLegalize; - - if (I == 1) { - // For the first operand, create an instruction for each part and setup - // the result. - for (Register PartReg : PartRegs) { - Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0); - NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode()) - .addDef(PartDstReg) - .addUse(PartReg)); - DstRegs.push_back(PartDstReg); - } - - for (Register LeftoverReg : LeftoverRegs) { - Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0); - NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode()) - .addDef(PartDstReg) - .addUse(LeftoverReg)); - LeftoverDstRegs.push_back(PartDstReg); - } + GenericMachineInstr &MI, unsigned NumElts, + std::initializer_list<unsigned> NonVecOpIndices) { + assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) && + "Non-compatible opcode or not specified non-vector operands"); + unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements(); + + unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs(); + unsigned NumDefs = MI.getNumDefs(); + + // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output. + // Build instructions with DstOps to use instruction found by CSE directly. + // CSE copies found instruction into given vreg when building with vreg dest. + SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs); + // Output registers will be taken from created instructions. + SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs); + for (unsigned i = 0; i < NumDefs; ++i) { + makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts); + } + + // Split vector input operands into sub-vectors with NumElts elts + Leftover. + // Operands listed in NonVecOpIndices will be used as is without splitting; + // examples: compare predicate in icmp and fcmp (op 1), vector select with i1 + // scalar condition (op 1), immediate in sext_inreg (op 2). + SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs); + for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands(); + ++UseIdx, ++UseNo) { + if (is_contained(NonVecOpIndices, UseIdx)) { + broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(), + MI.getOperand(UseIdx)); } else { - assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size()); - - // Add the newly created operand splits to the existing instructions. The - // odd-sized pieces are ordered after the requested NarrowTyArg sized - // pieces. - unsigned InstCount = 0; - for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J) - NewInsts[InstCount++].addUse(PartRegs[J]); - for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J) - NewInsts[InstCount++].addUse(LeftoverRegs[J]); + SmallVector<Register, 8> SplitPieces; + extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces); + for (auto Reg : SplitPieces) + InputOpsPieces[UseNo].push_back(Reg); } - - PartRegs.clear(); - LeftoverRegs.clear(); } - // Insert the newly built operations and rebuild the result register. - for (auto &MIB : NewInsts) - MIRBuilder.insertInstr(MIB); + unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0; - insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs); + // Take i-th piece of each input operand split and build sub-vector/scalar + // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s). + for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) { + SmallVector<DstOp, 2> Defs; + for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo) + Defs.push_back(OutputOpsPieces[DstNo][i]); - MI.eraseFromParent(); - return Legalized; -} + SmallVector<SrcOp, 3> Uses; + for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo) + Uses.push_back(InputOpsPieces[InputNo][i]); -LegalizerHelper::LegalizeResult -LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy) { - if (TypeIdx != 0) - return UnableToLegalize; - - Register DstReg = MI.getOperand(0).getReg(); - Register SrcReg = MI.getOperand(1).getReg(); - LLT DstTy = MRI.getType(DstReg); - LLT SrcTy = MRI.getType(SrcReg); - - LLT NarrowTy0 = NarrowTy; - LLT NarrowTy1; - unsigned NumParts; - - if (NarrowTy.isVector()) { - // Uneven breakdown not handled. - NumParts = DstTy.getNumElements() / NarrowTy.getNumElements(); - if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements()) - return UnableToLegalize; - - NarrowTy1 = LLT::vector(NarrowTy.getElementCount(), SrcTy.getElementType()); - } else { - NumParts = DstTy.getNumElements(); - NarrowTy1 = SrcTy.getElementType(); + auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags()); + for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo) + OutputRegs[DstNo].push_back(I.getReg(DstNo)); } - SmallVector<Register, 4> SrcRegs, DstRegs; - extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs); - - for (unsigned I = 0; I < NumParts; ++I) { - Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0); - MachineInstr *NewInst = - MIRBuilder.buildInstr(MI.getOpcode(), {DstReg}, {SrcRegs[I]}); - - NewInst->setFlags(MI.getFlags()); - DstRegs.push_back(DstReg); + // Merge small outputs into MI's output for each def operand. + if (NumLeftovers) { + for (unsigned i = 0; i < NumDefs; ++i) + mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]); + } else { + for (unsigned i = 0; i < NumDefs; ++i) + MIRBuilder.buildMerge(MI.getReg(i), OutputRegs[i]); } - if (NarrowTy.isVector()) - MIRBuilder.buildConcatVectors(DstReg, DstRegs); - else - MIRBuilder.buildBuildVector(DstReg, DstRegs); - MI.eraseFromParent(); return Legalized; } LegalizerHelper::LegalizeResult -LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy) { - Register DstReg = MI.getOperand(0).getReg(); - Register Src0Reg = MI.getOperand(2).getReg(); - LLT DstTy = MRI.getType(DstReg); - LLT SrcTy = MRI.getType(Src0Reg); - - unsigned NumParts; - LLT NarrowTy0, NarrowTy1; - - if (TypeIdx == 0) { - unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; - unsigned OldElts = DstTy.getNumElements(); - - NarrowTy0 = NarrowTy; - NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements(); - NarrowTy1 = NarrowTy.isVector() ? LLT::vector(NarrowTy.getElementCount(), - SrcTy.getScalarSizeInBits()) - : SrcTy.getElementType(); - - } else { - unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; - unsigned OldElts = SrcTy.getNumElements(); - - NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : - NarrowTy.getNumElements(); - NarrowTy0 = - LLT::vector(NarrowTy.getElementCount(), DstTy.getScalarSizeInBits()); - NarrowTy1 = NarrowTy; +LegalizerHelper::fewerElementsVectorPhi(GenericMachineInstr &MI, + unsigned NumElts) { + unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements(); + + unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs(); + unsigned NumDefs = MI.getNumDefs(); + + SmallVector<DstOp, 8> OutputOpsPieces; + SmallVector<Register, 8> OutputRegs; + makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts); + + // Instructions that perform register split will be inserted in basic block + // where register is defined (basic block is in the next operand). + SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2); + for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands(); + UseIdx += 2, ++UseNo) { + MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB(); + MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); + extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo]); } - // FIXME: Don't know how to handle the situation where the small vectors - // aren't all the same size yet. - if (NarrowTy1.isVector() && - NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements()) - return UnableToLegalize; - - CmpInst::Predicate Pred - = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); - - SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs; - extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs); - extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs); + // Build PHIs with fewer elements. + unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0; + MIRBuilder.setInsertPt(*MI.getParent(), MI); + for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) { + auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI); + Phi.addDef( + MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI))); + OutputRegs.push_back(Phi.getReg(0)); - for (unsigned I = 0; I < NumParts; ++I) { - Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0); - DstRegs.push_back(DstReg); - - if (MI.getOpcode() == TargetOpcode::G_ICMP) - MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]); - else { - MachineInstr *NewCmp - = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]); - NewCmp->setFlags(MI.getFlags()); + for (unsigned j = 0; j < NumInputs / 2; ++j) { + Phi.addUse(InputOpsPieces[j][i]); + Phi.add(MI.getOperand(1 + j * 2 + 1)); } } - if (NarrowTy1.isVector()) - MIRBuilder.buildConcatVectors(DstReg, DstRegs); - else - MIRBuilder.buildBuildVector(DstReg, DstRegs); - - MI.eraseFromParent(); - return Legalized; -} - -LegalizerHelper::LegalizeResult -LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy) { - Register DstReg = MI.getOperand(0).getReg(); - Register CondReg = MI.getOperand(1).getReg(); - - unsigned NumParts = 0; - LLT NarrowTy0, NarrowTy1; - - LLT DstTy = MRI.getType(DstReg); - LLT CondTy = MRI.getType(CondReg); - unsigned Size = DstTy.getSizeInBits(); - - assert(TypeIdx == 0 || CondTy.isVector()); - - if (TypeIdx == 0) { - NarrowTy0 = NarrowTy; - NarrowTy1 = CondTy; - - unsigned NarrowSize = NarrowTy0.getSizeInBits(); - // FIXME: Don't know how to handle the situation where the small vectors - // aren't all the same size yet. - if (Size % NarrowSize != 0) - return UnableToLegalize; - - NumParts = Size / NarrowSize; - - // Need to break down the condition type - if (CondTy.isVector()) { - if (CondTy.getNumElements() == NumParts) - NarrowTy1 = CondTy.getElementType(); - else - NarrowTy1 = - LLT::vector(CondTy.getElementCount().divideCoefficientBy(NumParts), - CondTy.getScalarSizeInBits()); - } + // Merge small outputs into MI's def. + if (NumLeftovers) { + mergeMixedSubvectors(MI.getReg(0), OutputRegs); } else { - NumParts = CondTy.getNumElements(); - if (NarrowTy.isVector()) { - // TODO: Handle uneven breakdown. - if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements()) - return UnableToLegalize; - - return UnableToLegalize; - } else { - NarrowTy0 = DstTy.getElementType(); - NarrowTy1 = NarrowTy; - } - } - - SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs; - if (CondTy.isVector()) - extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs); - - extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs); - extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs); - - for (unsigned i = 0; i < NumParts; ++i) { - Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0); - MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg, - Src1Regs[i], Src2Regs[i]); - DstRegs.push_back(DstReg); - } - - if (NarrowTy0.isVector()) - MIRBuilder.buildConcatVectors(DstReg, DstRegs); - else - MIRBuilder.buildBuildVector(DstReg, DstRegs); - - MI.eraseFromParent(); - return Legalized; -} - -LegalizerHelper::LegalizeResult -LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy) { - const Register DstReg = MI.getOperand(0).getReg(); - LLT PhiTy = MRI.getType(DstReg); - LLT LeftoverTy; - - // All of the operands need to have the same number of elements, so if we can - // determine a type breakdown for the result type, we can for all of the - // source types. - int NumParts, NumLeftover; - std::tie(NumParts, NumLeftover) - = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy); - if (NumParts < 0) - return UnableToLegalize; - - SmallVector<Register, 4> DstRegs, LeftoverDstRegs; - SmallVector<MachineInstrBuilder, 4> NewInsts; - - const int TotalNumParts = NumParts + NumLeftover; - - // Insert the new phis in the result block first. - for (int I = 0; I != TotalNumParts; ++I) { - LLT Ty = I < NumParts ? NarrowTy : LeftoverTy; - Register PartDstReg = MRI.createGenericVirtualRegister(Ty); - NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI) - .addDef(PartDstReg)); - if (I < NumParts) - DstRegs.push_back(PartDstReg); - else - LeftoverDstRegs.push_back(PartDstReg); - } - - MachineBasicBlock *MBB = MI.getParent(); - MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI()); - insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs); - - SmallVector<Register, 4> PartRegs, LeftoverRegs; - - // Insert code to extract the incoming values in each predecessor block. - for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { - PartRegs.clear(); - LeftoverRegs.clear(); - - Register SrcReg = MI.getOperand(I).getReg(); - MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); - MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); - - LLT Unused; - if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs, - LeftoverRegs)) - return UnableToLegalize; - - // Add the newly created operand splits to the existing instructions. The - // odd-sized pieces are ordered after the requested NarrowTyArg sized - // pieces. - for (int J = 0; J != TotalNumParts; ++J) { - MachineInstrBuilder MIB = NewInsts[J]; - MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]); - MIB.addMBB(&OpMBB); - } + MIRBuilder.buildMerge(MI.getReg(0), OutputRegs); } MI.eraseFromParent(); @@ -3927,27 +3842,36 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { - if (TypeIdx != 1) - return UnableToLegalize; - const int NumDst = MI.getNumOperands() - 1; const Register SrcReg = MI.getOperand(NumDst).getReg(); - LLT SrcTy = MRI.getType(SrcReg); - LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + LLT SrcTy = MRI.getType(SrcReg); - // TODO: Create sequence of extracts. - if (DstTy == NarrowTy) + if (TypeIdx != 1 || NarrowTy == DstTy) return UnableToLegalize; - LLT GCDTy = getGCDType(SrcTy, NarrowTy); - if (DstTy == GCDTy) { - // This would just be a copy of the same unmerge. - // TODO: Create extracts, pad with undef and create intermediate merges. + // Requires compatible types. Otherwise SrcReg should have been defined by + // merge-like instruction that would get artifact combined. Most likely + // instruction that defines SrcReg has to perform more/fewer elements + // legalization compatible with NarrowTy. + assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types"); + assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type"); + + if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) || + (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0)) return UnableToLegalize; - } - auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg); + // This is most likely DstTy (smaller then register size) packed in SrcTy + // (larger then register size) and since unmerge was not combined it will be + // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy + // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy. + + // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy) + // + // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence + // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg + // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy) + auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg); const int NumUnmerge = Unmerge->getNumOperands() - 1; const int PartsPerUnmerge = NumDst / NumUnmerge; @@ -3964,89 +3888,87 @@ LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI, } LegalizerHelper::LegalizeResult -LegalizerHelper::fewerElementsVectorMulo(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy) { - Register Result = MI.getOperand(0).getReg(); - Register Overflow = MI.getOperand(1).getReg(); - Register LHS = MI.getOperand(2).getReg(); - Register RHS = MI.getOperand(3).getReg(); - - LLT SrcTy = MRI.getType(LHS); - if (!SrcTy.isVector()) - return UnableToLegalize; - - LLT ElementType = SrcTy.getElementType(); - LLT OverflowElementTy = MRI.getType(Overflow).getElementType(); - const ElementCount NumResult = SrcTy.getElementCount(); - LLT GCDTy = getGCDType(SrcTy, NarrowTy); - - // Unmerge the operands to smaller parts of GCD type. - auto UnmergeLHS = MIRBuilder.buildUnmerge(GCDTy, LHS); - auto UnmergeRHS = MIRBuilder.buildUnmerge(GCDTy, RHS); - - const int NumOps = UnmergeLHS->getNumOperands() - 1; - const ElementCount PartsPerUnmerge = NumResult.divideCoefficientBy(NumOps); - LLT OverflowTy = LLT::scalarOrVector(PartsPerUnmerge, OverflowElementTy); - LLT ResultTy = LLT::scalarOrVector(PartsPerUnmerge, ElementType); - - // Perform the operation over unmerged parts. - SmallVector<Register, 8> ResultParts; - SmallVector<Register, 8> OverflowParts; - for (int I = 0; I != NumOps; ++I) { - Register Operand1 = UnmergeLHS->getOperand(I).getReg(); - Register Operand2 = UnmergeRHS->getOperand(I).getReg(); - auto PartMul = MIRBuilder.buildInstr(MI.getOpcode(), {ResultTy, OverflowTy}, - {Operand1, Operand2}); - ResultParts.push_back(PartMul->getOperand(0).getReg()); - OverflowParts.push_back(PartMul->getOperand(1).getReg()); - } - - LLT ResultLCMTy = buildLCMMergePieces(SrcTy, NarrowTy, GCDTy, ResultParts); - LLT OverflowLCMTy = - LLT::scalarOrVector(ResultLCMTy.getElementCount(), OverflowElementTy); - - // Recombine the pieces to the original result and overflow registers. - buildWidenedRemergeToDst(Result, ResultLCMTy, ResultParts); - buildWidenedRemergeToDst(Overflow, OverflowLCMTy, OverflowParts); - MI.eraseFromParent(); - return Legalized; -} - -// Handle FewerElementsVector a G_BUILD_VECTOR or G_CONCAT_VECTORS that produces -// a vector -// -// Create a G_BUILD_VECTOR or G_CONCAT_VECTORS of NarrowTy pieces, padding with -// undef as necessary. -// -// %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2 -// -> <2 x s16> -// -// %4:_(s16) = G_IMPLICIT_DEF -// %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1 -// %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4 -// %7:_(<2 x s16>) = G_IMPLICIT_DEF -// %8:_(<6 x s16>) = G_CONCAT_VECTORS %5, %6, %7 -// %3:_(<3 x s16>), %8:_(<3 x s16>) = G_UNMERGE_VALUES %8 -LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { Register DstReg = MI.getOperand(0).getReg(); LLT DstTy = MRI.getType(DstReg); LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); - LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy); + // Requires compatible types. Otherwise user of DstReg did not perform unmerge + // that should have been artifact combined. Most likely instruction that uses + // DstReg has to do more/fewer elements legalization compatible with NarrowTy. + assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types"); + assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type"); + if (NarrowTy == SrcTy) + return UnableToLegalize; - // Break into a common type - SmallVector<Register, 16> Parts; - for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) - extractGCDType(Parts, GCDTy, MO.getReg()); + // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use + // is for old mir tests. Since the changes to more/fewer elements it should no + // longer be possible to generate MIR like this when starting from llvm-ir + // because LCMTy approach was replaced with merge/unmerge to vector elements. + if (TypeIdx == 1) { + assert(SrcTy.isVector() && "Expected vector types"); + assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type"); + if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) || + (NarrowTy.getNumElements() >= SrcTy.getNumElements())) + return UnableToLegalize; + // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy) + // + // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy) + // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy) + // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4 + // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6 + // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8 + // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11 + + SmallVector<Register, 8> Elts; + LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType(); + for (unsigned i = 1; i < MI.getNumOperands(); ++i) { + auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg()); + for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j) + Elts.push_back(Unmerge.getReg(j)); + } - // Build the requested new merge, padding with undef. - LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, - TargetOpcode::G_ANYEXT); + SmallVector<Register, 8> NarrowTyElts; + unsigned NumNarrowTyElts = NarrowTy.getNumElements(); + unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts; + for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces; + ++i, Offset += NumNarrowTyElts) { + ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts); + NarrowTyElts.push_back(MIRBuilder.buildMerge(NarrowTy, Pieces).getReg(0)); + } - // Pack into the original result register. - buildWidenedRemergeToDst(DstReg, LCMTy, Parts); + MIRBuilder.buildMerge(DstReg, NarrowTyElts); + MI.eraseFromParent(); + return Legalized; + } + + assert(TypeIdx == 0 && "Bad type index"); + if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) || + (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0)) + return UnableToLegalize; + // This is most likely SrcTy (smaller then register size) packed in DstTy + // (larger then register size) and since merge was not combined it will be + // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy + // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy. + + // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4 + // + // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg + // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4 + // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence + SmallVector<Register, 8> NarrowTyElts; + unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements(); + unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1; + unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts; + for (unsigned i = 0; i < NumParts; ++i) { + SmallVector<Register, 8> Sources; + for (unsigned j = 0; j < NumElts; ++j) + Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg()); + NarrowTyElts.push_back(MIRBuilder.buildMerge(NarrowTy, Sources).getReg(0)); + } + + MIRBuilder.buildMerge(DstReg, NarrowTyElts); MI.eraseFromParent(); return Legalized; } @@ -4218,163 +4140,14 @@ LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx, } LegalizerHelper::LegalizeResult -LegalizerHelper::reduceOperationWidth(MachineInstr &MI, unsigned int TypeIdx, - LLT NarrowTy) { - assert(TypeIdx == 0 && "only one type index expected"); - - const unsigned Opc = MI.getOpcode(); - const int NumDefOps = MI.getNumExplicitDefs(); - const int NumSrcOps = MI.getNumOperands() - NumDefOps; - const unsigned Flags = MI.getFlags(); - const unsigned NarrowSize = NarrowTy.getSizeInBits(); - const LLT NarrowScalarTy = LLT::scalar(NarrowSize); - - assert(MI.getNumOperands() <= 4 && "expected instruction with either 1 " - "result and 1-3 sources or 2 results and " - "1-2 sources"); - - SmallVector<Register, 2> DstRegs; - for (int I = 0; I < NumDefOps; ++I) - DstRegs.push_back(MI.getOperand(I).getReg()); - - // First of all check whether we are narrowing (changing the element type) - // or reducing the vector elements - const LLT DstTy = MRI.getType(DstRegs[0]); - const bool IsNarrow = NarrowTy.getScalarType() != DstTy.getScalarType(); - - SmallVector<Register, 8> ExtractedRegs[3]; - SmallVector<Register, 8> Parts; - - // Break down all the sources into NarrowTy pieces we can operate on. This may - // involve creating merges to a wider type, padded with undef. - for (int I = 0; I != NumSrcOps; ++I) { - Register SrcReg = MI.getOperand(I + NumDefOps).getReg(); - LLT SrcTy = MRI.getType(SrcReg); - - // The type to narrow SrcReg to. For narrowing, this is a smaller scalar. - // For fewerElements, this is a smaller vector with the same element type. - LLT OpNarrowTy; - if (IsNarrow) { - OpNarrowTy = NarrowScalarTy; - - // In case of narrowing, we need to cast vectors to scalars for this to - // work properly - // FIXME: Can we do without the bitcast here if we're narrowing? - if (SrcTy.isVector()) { - SrcTy = LLT::scalar(SrcTy.getSizeInBits()); - SrcReg = MIRBuilder.buildBitcast(SrcTy, SrcReg).getReg(0); - } - } else { - auto NarrowEC = NarrowTy.isVector() ? NarrowTy.getElementCount() - : ElementCount::getFixed(1); - OpNarrowTy = LLT::scalarOrVector(NarrowEC, SrcTy.getScalarType()); - } - - LLT GCDTy = extractGCDType(ExtractedRegs[I], SrcTy, OpNarrowTy, SrcReg); - - // Build a sequence of NarrowTy pieces in ExtractedRegs for this operand. - buildLCMMergePieces(SrcTy, OpNarrowTy, GCDTy, ExtractedRegs[I], - TargetOpcode::G_ANYEXT); - } - - SmallVector<Register, 8> ResultRegs[2]; - - // Input operands for each sub-instruction. - SmallVector<SrcOp, 4> InputRegs(NumSrcOps, Register()); - - int NumParts = ExtractedRegs[0].size(); - const unsigned DstSize = DstTy.getSizeInBits(); - const LLT DstScalarTy = LLT::scalar(DstSize); - - // Narrowing needs to use scalar types - LLT DstLCMTy, NarrowDstTy; - if (IsNarrow) { - DstLCMTy = getLCMType(DstScalarTy, NarrowScalarTy); - NarrowDstTy = NarrowScalarTy; - } else { - DstLCMTy = getLCMType(DstTy, NarrowTy); - NarrowDstTy = NarrowTy; - } - - // We widened the source registers to satisfy merge/unmerge size - // constraints. We'll have some extra fully undef parts. - const int NumRealParts = (DstSize + NarrowSize - 1) / NarrowSize; - - for (int I = 0; I != NumRealParts; ++I) { - // Emit this instruction on each of the split pieces. - for (int J = 0; J != NumSrcOps; ++J) - InputRegs[J] = ExtractedRegs[J][I]; - - MachineInstrBuilder Inst; - if (NumDefOps == 1) - Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy}, InputRegs, Flags); - else - Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy, NarrowDstTy}, InputRegs, - Flags); - - for (int J = 0; J != NumDefOps; ++J) - ResultRegs[J].push_back(Inst.getReg(J)); - } - - // Fill out the widened result with undef instead of creating instructions - // with undef inputs. - int NumUndefParts = NumParts - NumRealParts; - if (NumUndefParts != 0) { - Register Undef = MIRBuilder.buildUndef(NarrowDstTy).getReg(0); - for (int I = 0; I != NumDefOps; ++I) - ResultRegs[I].append(NumUndefParts, Undef); - } - - // Extract the possibly padded result. Use a scratch register if we need to do - // a final bitcast, otherwise use the original result register. - Register MergeDstReg; - for (int I = 0; I != NumDefOps; ++I) { - if (IsNarrow && DstTy.isVector()) - MergeDstReg = MRI.createGenericVirtualRegister(DstScalarTy); - else - MergeDstReg = DstRegs[I]; - - buildWidenedRemergeToDst(MergeDstReg, DstLCMTy, ResultRegs[I]); - - // Recast to vector if we narrowed a vector - if (IsNarrow && DstTy.isVector()) - MIRBuilder.buildBitcast(DstRegs[I], MergeDstReg); - } - - MI.eraseFromParent(); - return Legalized; -} - -LegalizerHelper::LegalizeResult -LegalizerHelper::fewerElementsVectorSextInReg(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy) { - Register DstReg = MI.getOperand(0).getReg(); - Register SrcReg = MI.getOperand(1).getReg(); - int64_t Imm = MI.getOperand(2).getImm(); - - LLT DstTy = MRI.getType(DstReg); - - SmallVector<Register, 8> Parts; - LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg); - LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts); - - for (Register &R : Parts) - R = MIRBuilder.buildSExtInReg(NarrowTy, R, Imm).getReg(0); - - buildWidenedRemergeToDst(DstReg, LCMTy, Parts); - - MI.eraseFromParent(); - return Legalized; -} - -LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { using namespace TargetOpcode; + GenericMachineInstr &GMI = cast<GenericMachineInstr>(MI); + unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; switch (MI.getOpcode()) { case G_IMPLICIT_DEF: - return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy); case G_TRUNC: case G_AND: case G_OR: @@ -4439,10 +4212,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_SSUBSAT: case G_UADDSAT: case G_USUBSAT: - return reduceOperationWidth(MI, TypeIdx, NarrowTy); case G_UMULO: case G_SMULO: - return fewerElementsVectorMulo(MI, TypeIdx, NarrowTy); case G_SHL: case G_LSHR: case G_ASHR: @@ -4454,7 +4225,6 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_CTTZ_ZERO_UNDEF: case G_CTPOP: case G_FCOPYSIGN: - return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy); case G_ZEXT: case G_SEXT: case G_ANYEXT: @@ -4467,14 +4237,16 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_INTTOPTR: case G_PTRTOINT: case G_ADDRSPACE_CAST: - return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy); + return fewerElementsVectorMultiEltType(GMI, NumElts); case G_ICMP: case G_FCMP: - return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy); + return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/}); case G_SELECT: - return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy); + if (MRI.getType(MI.getOperand(1).getReg()).isVector()) + return fewerElementsVectorMultiEltType(GMI, NumElts); + return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/}); case G_PHI: - return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy); + return fewerElementsVectorPhi(GMI, NumElts); case G_UNMERGE_VALUES: return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy); case G_BUILD_VECTOR: @@ -4491,7 +4263,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_STORE: return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy); case G_SEXT_INREG: - return fewerElementsVectorSextInReg(MI, TypeIdx, NarrowTy); + return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/}); GISEL_VECREDUCE_CASES_NONSEQ return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy); case G_SHUFFLE_VECTOR: @@ -5053,6 +4825,15 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, case TargetOpcode::G_AND: case TargetOpcode::G_OR: case TargetOpcode::G_XOR: + case TargetOpcode::G_ADD: + case TargetOpcode::G_SUB: + case TargetOpcode::G_MUL: + case TargetOpcode::G_FADD: + case TargetOpcode::G_FMUL: + case TargetOpcode::G_UADDSAT: + case TargetOpcode::G_USUBSAT: + case TargetOpcode::G_SADDSAT: + case TargetOpcode::G_SSUBSAT: case TargetOpcode::G_SMIN: case TargetOpcode::G_SMAX: case TargetOpcode::G_UMIN: @@ -5070,6 +4851,17 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_FMA: + case TargetOpcode::G_FSHR: + case TargetOpcode::G_FSHL: { + Observer.changingInstr(MI); + moreElementsVectorSrc(MI, MoreTy, 1); + moreElementsVectorSrc(MI, MoreTy, 2); + moreElementsVectorSrc(MI, MoreTy, 3); + moreElementsVectorDst(MI, MoreTy, 0); + Observer.changedInstr(MI); + return Legalized; + } case TargetOpcode::G_EXTRACT: if (TypeIdx != 1) return UnableToLegalize; @@ -5079,6 +4871,11 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, return Legalized; case TargetOpcode::G_INSERT: case TargetOpcode::G_FREEZE: + case TargetOpcode::G_FNEG: + case TargetOpcode::G_FABS: + case TargetOpcode::G_BSWAP: + case TargetOpcode::G_FCANONICALIZE: + case TargetOpcode::G_SEXT_INREG: if (TypeIdx != 0) return UnableToLegalize; Observer.changingInstr(MI); @@ -5098,30 +4895,34 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, moreElementsVectorDst(MI, MoreTy, 0); Observer.changedInstr(MI); return Legalized; - case TargetOpcode::G_UNMERGE_VALUES: { - if (TypeIdx != 1) - return UnableToLegalize; - - LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); - int NumDst = MI.getNumOperands() - 1; - moreElementsVectorSrc(MI, MoreTy, NumDst); - - auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES); - for (int I = 0; I != NumDst; ++I) - MIB.addDef(MI.getOperand(I).getReg()); + case TargetOpcode::G_UNMERGE_VALUES: + return UnableToLegalize; + case TargetOpcode::G_PHI: + return moreElementsVectorPhi(MI, TypeIdx, MoreTy); + case TargetOpcode::G_SHUFFLE_VECTOR: + return moreElementsVectorShuffle(MI, TypeIdx, MoreTy); + case TargetOpcode::G_BUILD_VECTOR: { + SmallVector<SrcOp, 8> Elts; + for (auto Op : MI.uses()) { + Elts.push_back(Op.getReg()); + } - int NewNumDst = MoreTy.getSizeInBits() / DstTy.getSizeInBits(); - for (int I = NumDst; I != NewNumDst; ++I) - MIB.addDef(MRI.createGenericVirtualRegister(DstTy)); + for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) { + Elts.push_back(MIRBuilder.buildUndef(MoreTy.getScalarType())); + } - MIB.addUse(MI.getOperand(NumDst).getReg()); + MIRBuilder.buildDeleteTrailingVectorElements( + MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts)); MI.eraseFromParent(); return Legalized; } - case TargetOpcode::G_PHI: - return moreElementsVectorPhi(MI, TypeIdx, MoreTy); - case TargetOpcode::G_SHUFFLE_VECTOR: - return moreElementsVectorShuffle(MI, TypeIdx, MoreTy); + case TargetOpcode::G_TRUNC: { + Observer.changingInstr(MI); + moreElementsVectorSrc(MI, MoreTy, 1); + moreElementsVectorDst(MI, MoreTy, 0); + Observer.changedInstr(MI); + return Legalized; + } default: return UnableToLegalize; } @@ -6778,6 +6579,24 @@ LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) { LLT VecTy = MRI.getType(SrcVec); LLT EltTy = VecTy.getElementType(); + unsigned NumElts = VecTy.getNumElements(); + + int64_t IdxVal; + if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) { + SmallVector<Register, 8> SrcRegs; + extractParts(SrcVec, EltTy, NumElts, SrcRegs); + + if (InsertVal) { + SrcRegs[IdxVal] = MI.getOperand(2).getReg(); + MIRBuilder.buildMerge(DstReg, SrcRegs); + } else { + MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]); + } + + MI.eraseFromParent(); + return Legalized; + } + if (!EltTy.isByteSized()) { // Not implemented. LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n"); return UnableToLegalize; @@ -6796,7 +6615,6 @@ LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) { // if the index is out of bounds. Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx); - int64_t IdxVal; if (mi_match(Idx, MRI, m_ICst(IdxVal))) { int64_t Offset = IdxVal * EltBytes; PtrInfo = PtrInfo.getWithOffset(Offset); @@ -6923,6 +6741,32 @@ LegalizerHelper::lowerExtract(MachineInstr &MI) { LLT DstTy = MRI.getType(Dst); LLT SrcTy = MRI.getType(Src); + // Extract sub-vector or one element + if (SrcTy.isVector()) { + unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits(); + unsigned DstSize = DstTy.getSizeInBits(); + + if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) && + (Offset + DstSize <= SrcTy.getSizeInBits())) { + // Unmerge and allow access to each Src element for the artifact combiner. + auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), Src); + + // Take element(s) we need to extract and copy it (merge them). + SmallVector<Register, 8> SubVectorElts; + for (unsigned Idx = Offset / SrcEltSize; + Idx < (Offset + DstSize) / SrcEltSize; ++Idx) { + SubVectorElts.push_back(Unmerge.getReg(Idx)); + } + if (SubVectorElts.size() == 1) + MIRBuilder.buildCopy(Dst, SubVectorElts[0]); + else + MIRBuilder.buildMerge(Dst, SubVectorElts); + + MI.eraseFromParent(); + return Legalized; + } + } + if (DstTy.isScalar() && (SrcTy.isScalar() || (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) { @@ -6956,6 +6800,45 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) { LLT DstTy = MRI.getType(Src); LLT InsertTy = MRI.getType(InsertSrc); + // Insert sub-vector or one element + if (DstTy.isVector() && !InsertTy.isPointer()) { + LLT EltTy = DstTy.getElementType(); + unsigned EltSize = EltTy.getSizeInBits(); + unsigned InsertSize = InsertTy.getSizeInBits(); + + if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) && + (Offset + InsertSize <= DstTy.getSizeInBits())) { + auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src); + SmallVector<Register, 8> DstElts; + unsigned Idx = 0; + // Elements from Src before insert start Offset + for (; Idx < Offset / EltSize; ++Idx) { + DstElts.push_back(UnmergeSrc.getReg(Idx)); + } + + // Replace elements in Src with elements from InsertSrc + if (InsertTy.getSizeInBits() > EltSize) { + auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc); + for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize; + ++Idx, ++i) { + DstElts.push_back(UnmergeInsertSrc.getReg(i)); + } + } else { + DstElts.push_back(InsertSrc); + ++Idx; + } + + // Remaining elements from Src after insert + for (; Idx < DstTy.getNumElements(); ++Idx) { + DstElts.push_back(UnmergeSrc.getReg(Idx)); + } + + MIRBuilder.buildMerge(Dst, DstElts); + MI.eraseFromParent(); + return Legalized; + } + } + if (InsertTy.isVector() || (DstTy.isVector() && DstTy.getElementType() != InsertTy)) return UnableToLegalize; diff --git a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp index 03dda806cb1e..de8dbd456901 100644 --- a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp @@ -554,12 +554,11 @@ bool LoadStoreOpt::mergeBlockStores(MachineBasicBlock &MBB) { bool Changed = false; // Walk through the block bottom-up, looking for merging candidates. StoreMergeCandidate Candidate; - for (auto II = MBB.rbegin(), IE = MBB.rend(); II != IE; ++II) { - MachineInstr &MI = *II; + for (MachineInstr &MI : llvm::reverse(MBB)) { if (InstsToErase.contains(&MI)) continue; - if (auto StoreMI = dyn_cast<GStore>(&*II)) { + if (auto *StoreMI = dyn_cast<GStore>(&MI)) { // We have a G_STORE. Add it to the candidate if it writes to an adjacent // address. if (!addStoreToCandidate(*StoreMI, Candidate)) { diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index fb5ed35c1f72..391251886fbb 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -215,6 +215,48 @@ MachineInstrBuilder MachineIRBuilder::buildMaskLowPtrBits(const DstOp &Res, return buildPtrMask(Res, Op0, MaskReg); } +MachineInstrBuilder +MachineIRBuilder::buildPadVectorWithUndefElements(const DstOp &Res, + const SrcOp &Op0) { + LLT ResTy = Res.getLLTTy(*getMRI()); + LLT Op0Ty = Op0.getLLTTy(*getMRI()); + + assert((ResTy.isVector() && Op0Ty.isVector()) && "Non vector type"); + assert((ResTy.getElementType() == Op0Ty.getElementType()) && + "Different vector element types"); + assert((ResTy.getNumElements() > Op0Ty.getNumElements()) && + "Op0 has more elements"); + + auto Unmerge = buildUnmerge(Op0Ty.getElementType(), Op0); + SmallVector<Register, 8> Regs; + for (auto Op : Unmerge.getInstr()->defs()) + Regs.push_back(Op.getReg()); + Register Undef = buildUndef(Op0Ty.getElementType()).getReg(0); + unsigned NumberOfPadElts = ResTy.getNumElements() - Regs.size(); + for (unsigned i = 0; i < NumberOfPadElts; ++i) + Regs.push_back(Undef); + return buildMerge(Res, Regs); +} + +MachineInstrBuilder +MachineIRBuilder::buildDeleteTrailingVectorElements(const DstOp &Res, + const SrcOp &Op0) { + LLT ResTy = Res.getLLTTy(*getMRI()); + LLT Op0Ty = Op0.getLLTTy(*getMRI()); + + assert((ResTy.isVector() && Op0Ty.isVector()) && "Non vector type"); + assert((ResTy.getElementType() == Op0Ty.getElementType()) && + "Different vector element types"); + assert((ResTy.getNumElements() < Op0Ty.getNumElements()) && + "Op0 has fewer elements"); + + SmallVector<Register, 8> Regs; + auto Unmerge = buildUnmerge(Op0Ty.getElementType(), Op0); + for (unsigned i = 0; i < ResTy.getNumElements(); ++i) + Regs.push_back(Unmerge.getReg(i)); + return buildMerge(Res, Regs); +} + MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) { return buildInstr(TargetOpcode::G_BR).addMBB(&Dest); } @@ -613,10 +655,8 @@ MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<LLT> Res, MachineInstrBuilder MachineIRBuilder::buildUnmerge(LLT Res, const SrcOp &Op) { unsigned NumReg = Op.getLLTTy(*getMRI()).getSizeInBits() / Res.getSizeInBits(); - SmallVector<Register, 8> TmpVec; - for (unsigned I = 0; I != NumReg; ++I) - TmpVec.push_back(getMRI()->createGenericVirtualRegister(Res)); - return buildUnmerge(TmpVec, Op); + SmallVector<DstOp, 8> TmpVec(NumReg, Res); + return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op); } MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<Register> Res, diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index b0b84763e922..4981a537dc7c 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -923,6 +923,21 @@ LLT llvm::getLCMType(LLT OrigTy, LLT TargetTy) { return LLT::scalar(LCMSize); } +LLT llvm::getCoverTy(LLT OrigTy, LLT TargetTy) { + if (!OrigTy.isVector() || !TargetTy.isVector() || OrigTy == TargetTy || + (OrigTy.getScalarSizeInBits() != TargetTy.getScalarSizeInBits())) + return getLCMType(OrigTy, TargetTy); + + unsigned OrigTyNumElts = OrigTy.getNumElements(); + unsigned TargetTyNumElts = TargetTy.getNumElements(); + if (OrigTyNumElts % TargetTyNumElts == 0) + return OrigTy; + + unsigned NumElts = alignTo(OrigTyNumElts, TargetTyNumElts); + return LLT::scalarOrVector(ElementCount::getFixed(NumElts), + OrigTy.getElementType()); +} + LLT llvm::getGCDType(LLT OrigTy, LLT TargetTy) { const unsigned OrigSize = OrigTy.getSizeInBits(); const unsigned TargetSize = TargetTy.getSizeInBits(); @@ -1184,25 +1199,6 @@ bool llvm::shouldOptForSize(const MachineBasicBlock &MBB, llvm::shouldOptimizeForSize(MBB.getBasicBlock(), PSI, BFI); } -/// These artifacts generally don't have any debug users because they don't -/// directly originate from IR instructions, but instead usually from -/// legalization. Avoiding checking for debug users improves compile time. -/// Note that truncates or extends aren't included because they have IR -/// counterparts which can have debug users after translation. -static bool shouldSkipDbgValueFor(MachineInstr &MI) { - switch (MI.getOpcode()) { - case TargetOpcode::G_UNMERGE_VALUES: - case TargetOpcode::G_MERGE_VALUES: - case TargetOpcode::G_CONCAT_VECTORS: - case TargetOpcode::G_BUILD_VECTOR: - case TargetOpcode::G_EXTRACT: - case TargetOpcode::G_INSERT: - return true; - default: - return false; - } -} - void llvm::saveUsesAndErase(MachineInstr &MI, MachineRegisterInfo &MRI, LostDebugLocObserver *LocObserver, SmallInstListTy &DeadInstChain) { @@ -1212,10 +1208,7 @@ void llvm::saveUsesAndErase(MachineInstr &MI, MachineRegisterInfo &MRI, } LLVM_DEBUG(dbgs() << MI << "Is dead; erasing.\n"); DeadInstChain.remove(&MI); - if (shouldSkipDbgValueFor(MI)) - MI.eraseFromParent(); - else - MI.eraseFromParentAndMarkDBGValuesForRemoval(); + MI.eraseFromParent(); if (LocObserver) LocObserver->checkpoint(false); } diff --git a/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/llvm/lib/CodeGen/ImplicitNullChecks.cpp index 0882ce366c9c..fc97938ccd3e 100644 --- a/llvm/lib/CodeGen/ImplicitNullChecks.cpp +++ b/llvm/lib/CodeGen/ImplicitNullChecks.cpp @@ -242,7 +242,7 @@ bool ImplicitNullChecks::canHandle(const MachineInstr *MI) { auto IsRegMask = [](const MachineOperand &MO) { return MO.isRegMask(); }; (void)IsRegMask; - assert(!llvm::any_of(MI->operands(), IsRegMask) && + assert(llvm::none_of(MI->operands(), IsRegMask) && "Calls were filtered out above!"); auto IsUnordered = [](MachineMemOperand *MMO) { return MMO->isUnordered(); }; diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp index fc5ac45752ca..c975013db8c8 100644 --- a/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/llvm/lib/CodeGen/InlineSpiller.cpp @@ -686,9 +686,7 @@ void InlineSpiller::reMaterializeAll() { // Remove any values that were completely rematted. for (Register Reg : RegsToSpill) { LiveInterval &LI = LIS.getInterval(Reg); - for (LiveInterval::vni_iterator I = LI.vni_begin(), E = LI.vni_end(); - I != E; ++I) { - VNInfo *VNI = *I; + for (VNInfo *VNI : llvm::make_range(LI.vni_begin(), LI.vni_end())) { if (VNI->isUnused() || VNI->isPHIDef() || UsedValues.count(VNI)) continue; MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def); diff --git a/llvm/lib/CodeGen/InterferenceCache.cpp b/llvm/lib/CodeGen/InterferenceCache.cpp index a56485cdbc67..3cab9e5734ee 100644 --- a/llvm/lib/CodeGen/InterferenceCache.cpp +++ b/llvm/lib/CodeGen/InterferenceCache.cpp @@ -56,8 +56,8 @@ void InterferenceCache::init(MachineFunction *mf, LIUArray = liuarray; TRI = tri; reinitPhysRegEntries(); - for (unsigned i = 0; i != CacheEntries; ++i) - Entries[i].clear(mf, indexes, lis); + for (Entry &E : Entries) + E.clear(mf, indexes, lis); } InterferenceCache::Entry *InterferenceCache::get(MCRegister PhysReg) { diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp index cf62b0e5d7e8..e97dcca201e8 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp @@ -1249,8 +1249,8 @@ bool InstrRefBasedLDV::transferDebugPHI(MachineInstr &MI) { std::array<unsigned, 4> CandidateSizes = {64, 32, 16, 8}; Optional<ValueIDNum> Result = None; Optional<LocIdx> SpillLoc = None; - for (unsigned int I = 0; I < CandidateSizes.size(); ++I) { - unsigned SpillID = MTracker->getLocID(SpillNo, {CandidateSizes[I], 0}); + for (unsigned CS : CandidateSizes) { + unsigned SpillID = MTracker->getLocID(SpillNo, {CS, 0}); SpillLoc = MTracker->getSpillMLoc(SpillID); ValueIDNum Val = MTracker->readMLoc(*SpillLoc); // If this value was defined in it's own position, then it was probably diff --git a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp index a632d3d9ce76..b4dd41bbb810 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp @@ -492,10 +492,10 @@ private: static VarLoc CreateCopyLoc(const VarLoc &OldVL, const MachineLoc &OldML, Register NewReg) { VarLoc VL = OldVL; - for (size_t I = 0, E = VL.Locs.size(); I < E; ++I) - if (VL.Locs[I] == OldML) { - VL.Locs[I].Kind = MachineLocKind::RegisterKind; - VL.Locs[I].Value.RegNo = NewReg; + for (MachineLoc &ML : VL.Locs) + if (ML == OldML) { + ML.Kind = MachineLocKind::RegisterKind; + ML.Value.RegNo = NewReg; return VL; } llvm_unreachable("Should have found OldML in new VarLoc."); @@ -506,10 +506,10 @@ private: static VarLoc CreateSpillLoc(const VarLoc &OldVL, const MachineLoc &OldML, unsigned SpillBase, StackOffset SpillOffset) { VarLoc VL = OldVL; - for (int I = 0, E = VL.Locs.size(); I < E; ++I) - if (VL.Locs[I] == OldML) { - VL.Locs[I].Kind = MachineLocKind::SpillLocKind; - VL.Locs[I].Value.SpillLocation = {SpillBase, SpillOffset}; + for (MachineLoc &ML : VL.Locs) + if (ML == OldML) { + ML.Kind = MachineLocKind::SpillLocKind; + ML.Value.SpillLocation = {SpillBase, SpillOffset}; return VL; } llvm_unreachable("Should have found OldML in new VarLoc."); diff --git a/llvm/lib/CodeGen/LiveDebugVariables.cpp b/llvm/lib/CodeGen/LiveDebugVariables.cpp index 5f976bf43c5b..e6661e5135c3 100644 --- a/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -822,9 +822,6 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) { // register that hasn't been defined yet. If we do not remove those here, then // the re-insertion of the DBG_VALUE instruction after register allocation // will be incorrect. - // TODO: If earlier passes are corrected to generate sane debug information - // (and if the machine verifier is improved to catch this), then these checks - // could be removed or replaced by asserts. bool Discard = false; for (const MachineOperand &Op : MI.debug_operands()) { if (Op.isReg() && Register::isVirtualRegister(Op.getReg())) { @@ -1341,8 +1338,8 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<Register> NewRegs, bool DidChange = false; LocMap::iterator LocMapI; LocMapI.setMap(locInts); - for (unsigned i = 0; i != NewRegs.size(); ++i) { - LiveInterval *LI = &LIS.getInterval(NewRegs[i]); + for (Register NewReg : NewRegs) { + LiveInterval *LI = &LIS.getInterval(NewReg); if (LI->empty()) continue; @@ -1500,8 +1497,8 @@ void LDVImpl::splitRegister(Register OldReg, ArrayRef<Register> NewRegs) { // Map all of the new virtual registers. UserValue *UV = lookupVirtReg(OldReg); - for (unsigned i = 0; i != NewRegs.size(); ++i) - mapVirtReg(NewRegs[i], UV); + for (Register NewReg : NewRegs) + mapVirtReg(NewReg, UV); } void LiveDebugVariables:: diff --git a/llvm/lib/CodeGen/LiveDebugVariables.h b/llvm/lib/CodeGen/LiveDebugVariables.h index 07dd3a83866f..9998ce9e8dad 100644 --- a/llvm/lib/CodeGen/LiveDebugVariables.h +++ b/llvm/lib/CodeGen/LiveDebugVariables.h @@ -56,6 +56,11 @@ private: bool runOnMachineFunction(MachineFunction &) override; void releaseMemory() override; void getAnalysisUsage(AnalysisUsage &) const override; + + MachineFunctionProperties getSetProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::TracksDebugUserValues); + } }; } // end namespace llvm diff --git a/llvm/lib/CodeGen/LiveRangeEdit.cpp b/llvm/lib/CodeGen/LiveRangeEdit.cpp index 6380c4bfd6e6..05768140cbdf 100644 --- a/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -133,6 +133,22 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, if (OVNI != li.getVNInfoAt(UseIdx)) return false; + + // Check that subrange is live at UseIdx. + if (MO.getSubReg()) { + const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); + LaneBitmask LM = TRI->getSubRegIndexLaneMask(MO.getSubReg()); + for (LiveInterval::SubRange &SR : li.subranges()) { + if ((SR.LaneMask & LM).none()) + continue; + if (!SR.liveAt(UseIdx)) + return false; + // Early exit if all used lanes are checked. No need to continue. + LM &= ~SR.LaneMask; + if (LM.none()) + break; + } + } } return true; } diff --git a/llvm/lib/CodeGen/LiveVariables.cpp b/llvm/lib/CodeGen/LiveVariables.cpp index e8744797707b..94bdfab5e5e0 100644 --- a/llvm/lib/CodeGen/LiveVariables.cpp +++ b/llvm/lib/CodeGen/LiveVariables.cpp @@ -141,8 +141,8 @@ void LiveVariables::HandleVirtRegUse(Register Reg, MachineBasicBlock *MBB, } #ifndef NDEBUG - for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i) - assert(VRInfo.Kills[i]->getParent() != MBB && "entry should be at end!"); + for (MachineInstr *Kill : VRInfo.Kills) + assert(Kill->getParent() != MBB && "entry should be at end!"); #endif // This situation can occur: @@ -534,8 +534,7 @@ void LiveVariables::runOnInstr(MachineInstr &MI, MachineBasicBlock *MBB = MI.getParent(); // Process all uses. - for (unsigned i = 0, e = UseRegs.size(); i != e; ++i) { - unsigned MOReg = UseRegs[i]; + for (unsigned MOReg : UseRegs) { if (Register::isVirtualRegister(MOReg)) HandleVirtRegUse(MOReg, MBB, MI); else if (!MRI->isReserved(MOReg)) @@ -543,12 +542,11 @@ void LiveVariables::runOnInstr(MachineInstr &MI, } // Process all masked registers. (Call clobbers). - for (unsigned i = 0, e = RegMasks.size(); i != e; ++i) - HandleRegMask(MI.getOperand(RegMasks[i])); + for (unsigned Mask : RegMasks) + HandleRegMask(MI.getOperand(Mask)); // Process all defs. - for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) { - unsigned MOReg = DefRegs[i]; + for (unsigned MOReg : DefRegs) { if (Register::isVirtualRegister(MOReg)) HandleVirtRegDef(MOReg, MI); else if (!MRI->isReserved(MOReg)) diff --git a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp index ee2387d1e8e6..37fd3e4853ac 100644 --- a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -210,7 +210,11 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { StackObjSet SmallArrayObjs; StackObjSet AddrOfObjs; - AdjustStackOffset(MFI, StackProtectorFI, Offset, StackGrowsDown, MaxAlign); + // Only place the stack protector in the local stack area if the target + // allows it. + if (TFI.isStackIdSafeForLocalArea(MFI.getStackID(StackProtectorFI))) + AdjustStackOffset(MFI, StackProtectorFI, Offset, StackGrowsDown, + MaxAlign); // Assign large stack objects first. for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) { diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp index 6221b5929301..d0323eaf3d78 100644 --- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -350,18 +350,33 @@ void MIRParserImpl::computeFunctionProperties(MachineFunction &MF) { bool HasPHI = false; bool HasInlineAsm = false; + bool AllTiedOpsRewritten = true, HasTiedOps = false; for (const MachineBasicBlock &MBB : MF) { for (const MachineInstr &MI : MBB) { if (MI.isPHI()) HasPHI = true; if (MI.isInlineAsm()) HasInlineAsm = true; + for (unsigned I = 0; I < MI.getNumOperands(); ++I) { + const MachineOperand &MO = MI.getOperand(I); + if (!MO.isReg() || !MO.getReg()) + continue; + unsigned DefIdx; + if (MO.isUse() && MI.isRegTiedToDefOperand(I, &DefIdx)) { + HasTiedOps = true; + if (MO.getReg() != MI.getOperand(DefIdx).getReg()) + AllTiedOpsRewritten = false; + } + } } } if (!HasPHI) Properties.set(MachineFunctionProperties::Property::NoPHIs); MF.setHasInlineAsm(HasInlineAsm); + if (HasTiedOps && AllTiedOpsRewritten) + Properties.set(MachineFunctionProperties::Property::TiedOpsRewritten); + if (isSSA(MF)) Properties.set(MachineFunctionProperties::Property::IsSSA); else @@ -457,6 +472,9 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF, if (YamlMF.FailsVerification) MF.getProperties().set( MachineFunctionProperties::Property::FailsVerification); + if (YamlMF.TracksDebugUserValues) + MF.getProperties().set( + MachineFunctionProperties::Property::TracksDebugUserValues); PerFunctionMIParsingState PFS(MF, SM, IRSlots, *Target); if (parseRegisterInfo(PFS, YamlMF)) diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp index f1369396e37f..dc72f83ad0e4 100644 --- a/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/llvm/lib/CodeGen/MIRPrinter.cpp @@ -219,6 +219,8 @@ void MIRPrinter::print(const MachineFunction &MF) { MachineFunctionProperties::Property::FailedISel); YamlMF.FailsVerification = MF.getProperties().hasProperty( MachineFunctionProperties::Property::FailsVerification); + YamlMF.TracksDebugUserValues = MF.getProperties().hasProperty( + MachineFunctionProperties::Property::TracksDebugUserValues); convert(YamlMF, MF.getRegInfo(), MF.getSubtarget().getRegisterInfo()); MachineModuleSlotTracker MST(&MF); diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index 23c511aaa056..8c9d00d08c6a 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -193,7 +193,7 @@ void ilist_traits<MachineInstr>::transferNodesFromList(ilist_traits &FromList, void ilist_traits<MachineInstr>::deleteNode(MachineInstr *MI) { assert(!MI->getParent() && "MI is still in a block!"); - Parent->getParent()->DeleteMachineInstr(MI); + Parent->getParent()->deleteMachineInstr(MI); } MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() { @@ -1038,16 +1038,15 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge( // Collect a list of virtual registers killed by the terminators. SmallVector<Register, 4> KilledRegs; if (LV) - for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); - I != E; ++I) { - MachineInstr *MI = &*I; - for (MachineOperand &MO : MI->operands()) { + for (MachineInstr &MI : + llvm::make_range(getFirstInstrTerminator(), instr_end())) { + for (MachineOperand &MO : MI.operands()) { if (!MO.isReg() || MO.getReg() == 0 || !MO.isUse() || !MO.isKill() || MO.isUndef()) continue; Register Reg = MO.getReg(); if (Register::isPhysicalRegister(Reg) || - LV->getVarInfo(Reg).removeKill(*MI)) { + LV->getVarInfo(Reg).removeKill(MI)) { KilledRegs.push_back(Reg); LLVM_DEBUG(dbgs() << "Removing terminator kill: " << MI); MO.setIsKill(false); @@ -1057,11 +1056,9 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge( SmallVector<Register, 4> UsedRegs; if (LIS) { - for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); - I != E; ++I) { - MachineInstr *MI = &*I; - - for (const MachineOperand &MO : MI->operands()) { + for (MachineInstr &MI : + llvm::make_range(getFirstInstrTerminator(), instr_end())) { + for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || MO.getReg() == 0) continue; @@ -1078,9 +1075,9 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge( // SlotIndexes. SmallVector<MachineInstr*, 4> Terminators; if (Indexes) { - for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); - I != E; ++I) - Terminators.push_back(&*I); + for (MachineInstr &MI : + llvm::make_range(getFirstInstrTerminator(), instr_end())) + Terminators.push_back(&MI); } // Since we replaced all uses of Succ with NMBB, that should also be treated @@ -1091,9 +1088,9 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge( if (Indexes) { SmallVector<MachineInstr*, 4> NewTerminators; - for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); - I != E; ++I) - NewTerminators.push_back(&*I); + for (MachineInstr &MI : + llvm::make_range(getFirstInstrTerminator(), instr_end())) + NewTerminators.push_back(&MI); for (MachineInstr *Terminator : Terminators) { if (!is_contained(NewTerminators, Terminator)) diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index 8a1b4031642d..692587cd58fa 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -61,6 +61,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Utils/CodeLayout.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -193,6 +194,11 @@ static cl::opt<unsigned> TriangleChainCount( cl::init(2), cl::Hidden); +static cl::opt<bool> EnableExtTspBlockPlacement( + "enable-ext-tsp-block-placement", cl::Hidden, cl::init(false), + cl::desc("Enable machine block placement based on the ext-tsp model, " + "optimizing I-cache utilization.")); + namespace llvm { extern cl::opt<unsigned> StaticLikelyProb; extern cl::opt<unsigned> ProfileLikelyProb; @@ -557,6 +563,15 @@ class MachineBlockPlacement : public MachineFunctionPass { /// but a local analysis would not find them. void precomputeTriangleChains(); + /// Apply a post-processing step optimizing block placement. + void applyExtTsp(); + + /// Modify the existing block placement in the function and adjust all jumps. + void assignBlockOrder(const std::vector<const MachineBasicBlock *> &NewOrder); + + /// Create a single CFG chain from the current block order. + void createCFGChainExtTsp(); + public: static char ID; // Pass identification, replacement for typeid @@ -3387,6 +3402,15 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { } } + // Apply a post-processing optimizing block placement. + if (MF.size() >= 3 && EnableExtTspBlockPlacement) { + // Find a new placement and modify the layout of the blocks in the function. + applyExtTsp(); + + // Re-create CFG chain so that we can optimizeBranches and alignBlocks. + createCFGChainExtTsp(); + } + optimizeBranches(); alignBlocks(); @@ -3413,12 +3437,147 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { MBFI->view("MBP." + MF.getName(), false); } - // We always return true as we have no way to track whether the final order // differs from the original order. return true; } +void MachineBlockPlacement::applyExtTsp() { + // Prepare data; blocks are indexed by their index in the current ordering. + DenseMap<const MachineBasicBlock *, uint64_t> BlockIndex; + BlockIndex.reserve(F->size()); + std::vector<const MachineBasicBlock *> CurrentBlockOrder; + CurrentBlockOrder.reserve(F->size()); + size_t NumBlocks = 0; + for (const MachineBasicBlock &MBB : *F) { + BlockIndex[&MBB] = NumBlocks++; + CurrentBlockOrder.push_back(&MBB); + } + + auto BlockSizes = std::vector<uint64_t>(F->size()); + auto BlockCounts = std::vector<uint64_t>(F->size()); + DenseMap<std::pair<uint64_t, uint64_t>, uint64_t> JumpCounts; + for (MachineBasicBlock &MBB : *F) { + // Getting the block frequency. + BlockFrequency BlockFreq = MBFI->getBlockFreq(&MBB); + BlockCounts[BlockIndex[&MBB]] = BlockFreq.getFrequency(); + // Getting the block size: + // - approximate the size of an instruction by 4 bytes, and + // - ignore debug instructions. + // Note: getting the exact size of each block is target-dependent and can be + // done by extending the interface of MCCodeEmitter. Experimentally we do + // not see a perf improvement with the exact block sizes. + auto NonDbgInsts = + instructionsWithoutDebug(MBB.instr_begin(), MBB.instr_end()); + int NumInsts = std::distance(NonDbgInsts.begin(), NonDbgInsts.end()); + BlockSizes[BlockIndex[&MBB]] = 4 * NumInsts; + // Getting jump frequencies. + for (MachineBasicBlock *Succ : MBB.successors()) { + auto EP = MBPI->getEdgeProbability(&MBB, Succ); + BlockFrequency EdgeFreq = BlockFreq * EP; + auto Edge = std::make_pair(BlockIndex[&MBB], BlockIndex[Succ]); + JumpCounts[Edge] = EdgeFreq.getFrequency(); + } + } + + LLVM_DEBUG(dbgs() << "Applying ext-tsp layout for |V| = " << F->size() + << " with profile = " << F->getFunction().hasProfileData() + << " (" << F->getName().str() << ")" + << "\n"); + LLVM_DEBUG( + dbgs() << format(" original layout score: %0.2f\n", + calcExtTspScore(BlockSizes, BlockCounts, JumpCounts))); + + // Run the layout algorithm. + auto NewOrder = applyExtTspLayout(BlockSizes, BlockCounts, JumpCounts); + std::vector<const MachineBasicBlock *> NewBlockOrder; + NewBlockOrder.reserve(F->size()); + for (uint64_t Node : NewOrder) { + NewBlockOrder.push_back(CurrentBlockOrder[Node]); + } + LLVM_DEBUG(dbgs() << format(" optimized layout score: %0.2f\n", + calcExtTspScore(NewOrder, BlockSizes, BlockCounts, + JumpCounts))); + + // Assign new block order. + assignBlockOrder(NewBlockOrder); +} + +void MachineBlockPlacement::assignBlockOrder( + const std::vector<const MachineBasicBlock *> &NewBlockOrder) { + assert(F->size() == NewBlockOrder.size() && "Incorrect size of block order"); + F->RenumberBlocks(); + + bool HasChanges = false; + for (size_t I = 0; I < NewBlockOrder.size(); I++) { + if (NewBlockOrder[I] != F->getBlockNumbered(I)) { + HasChanges = true; + break; + } + } + // Stop early if the new block order is identical to the existing one. + if (!HasChanges) + return; + + SmallVector<MachineBasicBlock *, 4> PrevFallThroughs(F->getNumBlockIDs()); + for (auto &MBB : *F) { + PrevFallThroughs[MBB.getNumber()] = MBB.getFallThrough(); + } + + // Sort basic blocks in the function according to the computed order. + DenseMap<const MachineBasicBlock *, size_t> NewIndex; + for (const MachineBasicBlock *MBB : NewBlockOrder) { + NewIndex[MBB] = NewIndex.size(); + } + F->sort([&](MachineBasicBlock &L, MachineBasicBlock &R) { + return NewIndex[&L] < NewIndex[&R]; + }); + + // Update basic block branches by inserting explicit fallthrough branches + // when required and re-optimize branches when possible. + const TargetInstrInfo *TII = F->getSubtarget().getInstrInfo(); + SmallVector<MachineOperand, 4> Cond; + for (auto &MBB : *F) { + MachineFunction::iterator NextMBB = std::next(MBB.getIterator()); + MachineFunction::iterator EndIt = MBB.getParent()->end(); + auto *FTMBB = PrevFallThroughs[MBB.getNumber()]; + // If this block had a fallthrough before we need an explicit unconditional + // branch to that block if the fallthrough block is not adjacent to the + // block in the new order. + if (FTMBB && (NextMBB == EndIt || &*NextMBB != FTMBB)) { + TII->insertUnconditionalBranch(MBB, FTMBB, MBB.findBranchDebugLoc()); + } + + // It might be possible to optimize branches by flipping the condition. + Cond.clear(); + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; + if (TII->analyzeBranch(MBB, TBB, FBB, Cond)) + continue; + MBB.updateTerminator(FTMBB); + } + +#ifndef NDEBUG + // Make sure we correctly constructed all branches. + F->verify(this, "After optimized block reordering"); +#endif +} + +void MachineBlockPlacement::createCFGChainExtTsp() { + BlockToChain.clear(); + ComputedEdges.clear(); + ChainAllocator.DestroyAll(); + + MachineBasicBlock *HeadBB = &F->front(); + BlockChain *FunctionChain = + new (ChainAllocator.Allocate()) BlockChain(BlockToChain, HeadBB); + + for (MachineBasicBlock &MBB : *F) { + if (HeadBB == &MBB) + continue; // Ignore head of the chain + FunctionChain->merge(&MBB, nullptr); + } +} + namespace { /// A pass to compute block placement statistics. diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp index e2b6cfe55c16..72ab9ee4f388 100644 --- a/llvm/lib/CodeGen/MachineCombiner.cpp +++ b/llvm/lib/CodeGen/MachineCombiner.cpp @@ -485,7 +485,7 @@ static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI, MBB->insert((MachineBasicBlock::iterator)&MI, InstrPtr); for (auto *InstrPtr : DelInstrs) { - InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval(); + InstrPtr->eraseFromParent(); // Erase all LiveRegs defined by the removed instruction for (auto I = RegUnits.begin(); I != RegUnits.end(); ) { if (I->MI == InstrPtr) @@ -693,7 +693,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { // use for them. MachineFunction *MF = MBB->getParent(); for (auto *InstrPtr : InsInstrs) - MF->DeleteMachineInstr(InstrPtr); + MF->deleteMachineInstr(InstrPtr); } InstrIdxForVirtReg.clear(); } diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp index 7c83bacd80d9..57fbe4112e47 100644 --- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -847,31 +847,27 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock( LLVM_DEBUG(dbgs() << "MCP: BackwardCopyPropagateBlock " << MBB.getName() << "\n"); - for (MachineBasicBlock::reverse_iterator I = MBB.rbegin(), E = MBB.rend(); - I != E;) { - MachineInstr *MI = &*I; - ++I; - + for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(MBB))) { // Ignore non-trivial COPYs. - if (MI->isCopy() && MI->getNumOperands() == 2 && - !TRI->regsOverlap(MI->getOperand(0).getReg(), - MI->getOperand(1).getReg())) { + if (MI.isCopy() && MI.getNumOperands() == 2 && + !TRI->regsOverlap(MI.getOperand(0).getReg(), + MI.getOperand(1).getReg())) { - MCRegister Def = MI->getOperand(0).getReg().asMCReg(); - MCRegister Src = MI->getOperand(1).getReg().asMCReg(); + MCRegister Def = MI.getOperand(0).getReg().asMCReg(); + MCRegister Src = MI.getOperand(1).getReg().asMCReg(); // Unlike forward cp, we don't invoke propagateDefs here, // just let forward cp do COPY-to-COPY propagation. - if (isBackwardPropagatableCopy(*MI, *MRI)) { + if (isBackwardPropagatableCopy(MI, *MRI)) { Tracker.invalidateRegister(Src, *TRI); Tracker.invalidateRegister(Def, *TRI); - Tracker.trackCopy(MI, *TRI); + Tracker.trackCopy(&MI, *TRI); continue; } } // Invalidate any earlyclobber regs first. - for (const MachineOperand &MO : MI->operands()) + for (const MachineOperand &MO : MI.operands()) if (MO.isReg() && MO.isEarlyClobber()) { MCRegister Reg = MO.getReg().asMCReg(); if (!Reg) @@ -879,8 +875,8 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock( Tracker.invalidateRegister(Reg, *TRI); } - propagateDefs(*MI); - for (const MachineOperand &MO : MI->operands()) { + propagateDefs(MI); + for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; @@ -898,7 +894,7 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock( for (MCRegUnitIterator RUI(MO.getReg().asMCReg(), TRI); RUI.isValid(); ++RUI) { if (auto *Copy = Tracker.findCopyDefViaUnit(*RUI, *TRI)) { - CopyDbgUsers[Copy].insert(MI); + CopyDbgUsers[Copy].insert(&MI); } } } else { diff --git a/llvm/lib/CodeGen/MachineCycleAnalysis.cpp b/llvm/lib/CodeGen/MachineCycleAnalysis.cpp new file mode 100644 index 000000000000..42a5e2b7af01 --- /dev/null +++ b/llvm/lib/CodeGen/MachineCycleAnalysis.cpp @@ -0,0 +1,113 @@ +//===- MachineCycleAnalysis.cpp - Compute CycleInfo for Machine IR --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineCycleAnalysis.h" +#include "llvm/ADT/GenericCycleImpl.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineSSAContext.h" +#include "llvm/InitializePasses.h" + +using namespace llvm; + +template class llvm::GenericCycleInfo<llvm::MachineSSAContext>; +template class llvm::GenericCycle<llvm::MachineSSAContext>; + +namespace { + +/// Legacy analysis pass which computes a \ref MachineCycleInfo. +class MachineCycleInfoWrapperPass : public MachineFunctionPass { + MachineFunction *F = nullptr; + MachineCycleInfo CI; + +public: + static char ID; + + MachineCycleInfoWrapperPass(); + + MachineCycleInfo &getCycleInfo() { return CI; } + const MachineCycleInfo &getCycleInfo() const { return CI; } + + bool runOnMachineFunction(MachineFunction &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + void releaseMemory() override; + void print(raw_ostream &OS, const Module *M = nullptr) const override; + + // TODO: verify analysis +}; + +class MachineCycleInfoPrinterPass : public MachineFunctionPass { +public: + static char ID; + + MachineCycleInfoPrinterPass(); + + bool runOnMachineFunction(MachineFunction &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; + +} // namespace + +char MachineCycleInfoWrapperPass::ID = 0; + +MachineCycleInfoWrapperPass::MachineCycleInfoWrapperPass() + : MachineFunctionPass(ID) { + initializeMachineCycleInfoWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +INITIALIZE_PASS_BEGIN(MachineCycleInfoWrapperPass, "machine-cycles", + "Machine Cycle Info Analysis", true, true) +INITIALIZE_PASS_END(MachineCycleInfoWrapperPass, "machine-cycles", + "Machine Cycle Info Analysis", true, true) + +void MachineCycleInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool MachineCycleInfoWrapperPass::runOnMachineFunction(MachineFunction &Func) { + CI.clear(); + + F = &Func; + CI.compute(Func); + return false; +} + +void MachineCycleInfoWrapperPass::print(raw_ostream &OS, const Module *) const { + OS << "MachineCycleInfo for function: " << F->getName() << "\n"; + CI.print(OS); +} + +void MachineCycleInfoWrapperPass::releaseMemory() { + CI.clear(); + F = nullptr; +} + +char MachineCycleInfoPrinterPass::ID = 0; + +MachineCycleInfoPrinterPass::MachineCycleInfoPrinterPass() + : MachineFunctionPass(ID) { + initializeMachineCycleInfoPrinterPassPass(*PassRegistry::getPassRegistry()); +} + +INITIALIZE_PASS_BEGIN(MachineCycleInfoPrinterPass, "print-machine-cycles", + "Print Machine Cycle Info Analysis", true, true) +INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass) +INITIALIZE_PASS_END(MachineCycleInfoPrinterPass, "print-machine-cycles", + "Print Machine Cycle Info Analysis", true, true) + +void MachineCycleInfoPrinterPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<MachineCycleInfoWrapperPass>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool MachineCycleInfoPrinterPass::runOnMachineFunction(MachineFunction &F) { + auto &CI = getAnalysis<MachineCycleInfoWrapperPass>(); + CI.print(errs()); + return false; +} diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index 310c2721c3bd..81ed3d0e93ff 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -89,6 +89,7 @@ static cl::opt<unsigned> AlignAllFunctions( static const char *getPropertyName(MachineFunctionProperties::Property Prop) { using P = MachineFunctionProperties::Property; + // clang-format off switch(Prop) { case P::FailedISel: return "FailedISel"; case P::IsSSA: return "IsSSA"; @@ -100,7 +101,9 @@ static const char *getPropertyName(MachineFunctionProperties::Property Prop) { case P::TracksLiveness: return "TracksLiveness"; case P::TiedOpsRewritten: return "TiedOpsRewritten"; case P::FailsVerification: return "FailsVerification"; + case P::TracksDebugUserValues: return "TracksDebugUserValues"; } + // clang-format on llvm_unreachable("Invalid machine function property"); } @@ -125,7 +128,7 @@ void MachineFunctionProperties::print(raw_ostream &OS) const { MachineFunctionInfo::~MachineFunctionInfo() = default; void ilist_alloc_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) { - MBB->getParent()->DeleteMachineBasicBlock(MBB); + MBB->getParent()->deleteMachineBasicBlock(MBB); } static inline unsigned getFnStackAlignment(const TargetSubtargetInfo *STI, @@ -347,10 +350,10 @@ void MachineFunction::assignBeginEndSections() { /// Allocate a new MachineInstr. Use this instead of `new MachineInstr'. MachineInstr *MachineFunction::CreateMachineInstr(const MCInstrDesc &MCID, - const DebugLoc &DL, + DebugLoc DL, bool NoImplicit) { return new (InstructionRecycler.Allocate<MachineInstr>(Allocator)) - MachineInstr(*this, MCID, DL, NoImplicit); + MachineInstr(*this, MCID, std::move(DL), NoImplicit); } /// Create a new MachineInstr which is a copy of the 'Orig' instruction, @@ -361,8 +364,9 @@ MachineFunction::CloneMachineInstr(const MachineInstr *Orig) { MachineInstr(*this, *Orig); } -MachineInstr &MachineFunction::CloneMachineInstrBundle(MachineBasicBlock &MBB, - MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) { +MachineInstr &MachineFunction::cloneMachineInstrBundle( + MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, + const MachineInstr &Orig) { MachineInstr *FirstClone = nullptr; MachineBasicBlock::const_instr_iterator I = Orig.getIterator(); while (true) { @@ -390,8 +394,7 @@ MachineInstr &MachineFunction::CloneMachineInstrBundle(MachineBasicBlock &MBB, /// /// This function also serves as the MachineInstr destructor - the real /// ~MachineInstr() destructor must be empty. -void -MachineFunction::DeleteMachineInstr(MachineInstr *MI) { +void MachineFunction::deleteMachineInstr(MachineInstr *MI) { // Verify that a call site info is at valid state. This assertion should // be triggered during the implementation of support for the // call site info of a new architecture. If the assertion is triggered, @@ -418,8 +421,7 @@ MachineFunction::CreateMachineBasicBlock(const BasicBlock *bb) { } /// Delete the given MachineBasicBlock. -void -MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) { +void MachineFunction::deleteMachineBasicBlock(MachineBasicBlock *MBB) { assert(MBB->getParent() == this && "MBB parent mismatch!"); // Clean up any references to MBB in jump tables before deleting it. if (JumpTableInfo) @@ -769,8 +771,8 @@ MCSymbol *MachineFunction::addLandingPad(MachineBasicBlock *LandingPad) { void MachineFunction::addCatchTypeInfo(MachineBasicBlock *LandingPad, ArrayRef<const GlobalValue *> TyInfo) { LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); - for (unsigned N = TyInfo.size(); N; --N) - LP.TypeIds.push_back(getTypeIDFor(TyInfo[N - 1])); + for (const GlobalValue *GV : llvm::reverse(TyInfo)) + LP.TypeIds.push_back(getTypeIDFor(GV)); } void MachineFunction::addFilterTypeInfo(MachineBasicBlock *LandingPad, @@ -1404,10 +1406,10 @@ MachineConstantPool::~MachineConstantPool() { // A constant may be a member of both Constants and MachineCPVsSharingEntries, // so keep track of which we've deleted to avoid double deletions. DenseSet<MachineConstantPoolValue*> Deleted; - for (unsigned i = 0, e = Constants.size(); i != e; ++i) - if (Constants[i].isMachineConstantPoolEntry()) { - Deleted.insert(Constants[i].Val.MachineCPVal); - delete Constants[i].Val.MachineCPVal; + for (const MachineConstantPoolEntry &C : Constants) + if (C.isMachineConstantPoolEntry()) { + Deleted.insert(C.Val.MachineCPVal); + delete C.Val.MachineCPVal; } for (MachineConstantPoolValue *CPV : MachineCPVsSharingEntries) { if (Deleted.count(CPV) == 0) diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index aaa80432d2f2..85b266afceef 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -115,10 +115,10 @@ void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) { /// MachineInstr ctor - This constructor creates a MachineInstr and adds the /// implicit operands. It reserves space for the number of operands specified by /// the MCInstrDesc. -MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid, - DebugLoc dl, bool NoImp) - : MCID(&tid), debugLoc(std::move(dl)), DebugInstrNum(0) { - assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor"); +MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &TID, + DebugLoc DL, bool NoImp) + : MCID(&TID), DbgLoc(std::move(DL)), DebugInstrNum(0) { + assert(DbgLoc.hasTrivialDestructor() && "Expected trivial destructor"); // Reserve space for the expected number of operands. if (unsigned NumOps = MCID->getNumOperands() + @@ -135,9 +135,9 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid, /// Does not copy the number from debug instruction numbering, to preserve /// uniqueness. MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) - : MCID(&MI.getDesc()), Info(MI.Info), debugLoc(MI.getDebugLoc()), + : MCID(&MI.getDesc()), Info(MI.Info), DbgLoc(MI.getDebugLoc()), DebugInstrNum(0) { - assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor"); + assert(DbgLoc.hasTrivialDestructor() && "Expected trivial destructor"); CapOperands = OperandCapacity::get(MI.getNumOperands()); Operands = MF.allocateOperandArray(CapOperands); @@ -682,26 +682,6 @@ void MachineInstr::eraseFromParent() { getParent()->erase(this); } -void MachineInstr::eraseFromParentAndMarkDBGValuesForRemoval() { - assert(getParent() && "Not embedded in a basic block!"); - MachineBasicBlock *MBB = getParent(); - MachineFunction *MF = MBB->getParent(); - assert(MF && "Not embedded in a function!"); - - MachineInstr *MI = (MachineInstr *)this; - MachineRegisterInfo &MRI = MF->getRegInfo(); - - for (const MachineOperand &MO : MI->operands()) { - if (!MO.isReg() || !MO.isDef()) - continue; - Register Reg = MO.getReg(); - if (!Reg.isVirtual()) - continue; - MRI.markUsesInDebugValueAsUndef(Reg); - } - MI->eraseFromParent(); -} - void MachineInstr::eraseFromBundle() { assert(getParent() && "Not embedded in a basic block!"); getParent()->erase_instr(this); diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index 8d6459a627fa..762395542b40 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -649,7 +649,7 @@ void SwingSchedulerDAG::schedule() { /// Clean up after the software pipeliner runs. void SwingSchedulerDAG::finishBlock() { for (auto &KV : NewMIs) - MF.DeleteMachineInstr(KV.second); + MF.deleteMachineInstr(KV.second); NewMIs.clear(); // Call the superclass. @@ -1101,17 +1101,15 @@ unsigned SwingSchedulerDAG::calculateResMII() { // Sort the instructions by the number of available choices for scheduling, // least to most. Use the number of critical resources as the tie breaker. FuncUnitSorter FUS = FuncUnitSorter(MF.getSubtarget()); - for (MachineBasicBlock::iterator I = MBB->getFirstNonPHI(), - E = MBB->getFirstTerminator(); - I != E; ++I) - FUS.calcCriticalResources(*I); + for (MachineInstr &MI : + llvm::make_range(MBB->getFirstNonPHI(), MBB->getFirstTerminator())) + FUS.calcCriticalResources(MI); PriorityQueue<MachineInstr *, std::vector<MachineInstr *>, FuncUnitSorter> FuncUnitOrder(FUS); - for (MachineBasicBlock::iterator I = MBB->getFirstNonPHI(), - E = MBB->getFirstTerminator(); - I != E; ++I) - FuncUnitOrder.push(&*I); + for (MachineInstr &MI : + llvm::make_range(MBB->getFirstNonPHI(), MBB->getFirstTerminator())) + FuncUnitOrder.push(&MI); while (!FuncUnitOrder.empty()) { MachineInstr *MI = FuncUnitOrder.top(); @@ -1192,14 +1190,10 @@ unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) { /// but we do this to find the circuits, and then change them back. static void swapAntiDependences(std::vector<SUnit> &SUnits) { SmallVector<std::pair<SUnit *, SDep>, 8> DepsAdded; - for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { - SUnit *SU = &SUnits[i]; - for (SUnit::pred_iterator IP = SU->Preds.begin(), EP = SU->Preds.end(); - IP != EP; ++IP) { - if (IP->getKind() != SDep::Anti) - continue; - DepsAdded.push_back(std::make_pair(SU, *IP)); - } + for (SUnit &SU : SUnits) { + for (SDep &Pred : SU.Preds) + if (Pred.getKind() == SDep::Anti) + DepsAdded.push_back(std::make_pair(&SU, Pred)); } for (std::pair<SUnit *, SDep> &P : DepsAdded) { // Remove this anti dependency and add one in the reverse direction. @@ -1471,27 +1465,23 @@ void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) { } // Compute ALAP, ZeroLatencyHeight, and MOV. - for (ScheduleDAGTopologicalSort::const_reverse_iterator I = Topo.rbegin(), - E = Topo.rend(); - I != E; ++I) { + for (int I : llvm::reverse(Topo)) { int alap = maxASAP; int zeroLatencyHeight = 0; - SUnit *SU = &SUnits[*I]; - for (SUnit::const_succ_iterator IS = SU->Succs.begin(), - ES = SU->Succs.end(); - IS != ES; ++IS) { - SUnit *succ = IS->getSUnit(); - if (IS->getLatency() == 0) + SUnit *SU = &SUnits[I]; + for (const SDep &S : SU->Succs) { + SUnit *succ = S.getSUnit(); + if (S.getLatency() == 0) zeroLatencyHeight = std::max(zeroLatencyHeight, getZeroLatencyHeight(succ) + 1); - if (ignoreDependence(*IS, true)) + if (ignoreDependence(S, true)) continue; - alap = std::min(alap, (int)(getALAP(succ) - IS->getLatency() + - getDistance(SU, succ, *IS) * MII)); + alap = std::min(alap, (int)(getALAP(succ) - S.getLatency() + + getDistance(SU, succ, S) * MII)); } - ScheduleInfo[*I].ALAP = alap; - ScheduleInfo[*I].ZeroLatencyHeight = zeroLatencyHeight; + ScheduleInfo[I].ALAP = alap; + ScheduleInfo[I].ZeroLatencyHeight = zeroLatencyHeight; } // After computing the node functions, compute the summary for each node set. @@ -1548,9 +1538,8 @@ static bool succ_L(SetVector<SUnit *> &NodeOrder, SmallSetVector<SUnit *, 8> &Succs, const NodeSet *S = nullptr) { Succs.clear(); - for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end(); - I != E; ++I) { - for (SDep &Succ : (*I)->Succs) { + for (const SUnit *SU : NodeOrder) { + for (const SDep &Succ : SU->Succs) { if (S && S->count(Succ.getSUnit()) == 0) continue; if (ignoreDependence(Succ, false)) @@ -1558,7 +1547,7 @@ static bool succ_L(SetVector<SUnit *> &NodeOrder, if (NodeOrder.count(Succ.getSUnit()) == 0) Succs.insert(Succ.getSUnit()); } - for (SDep &Pred : (*I)->Preds) { + for (const SDep &Pred : SU->Preds) { if (Pred.getKind() != SDep::Anti) continue; if (S && S->count(Pred.getSUnit()) == 0) @@ -2202,7 +2191,7 @@ bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI, MachineInstr *NewMI = MF.CloneMachineInstr(MI); NewMI->getOperand(OffsetPosLd).setImm(LoadOffset + StoreOffset); bool Disjoint = TII->areMemAccessesTriviallyDisjoint(*NewMI, *PrevDef); - MF.DeleteMachineInstr(NewMI); + MF.deleteMachineInstr(NewMI); if (!Disjoint) return false; @@ -2885,10 +2874,8 @@ void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) { ++stage) { std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[cycle + (stage * InitiationInterval)]; - for (std::deque<SUnit *>::reverse_iterator I = cycleInstrs.rbegin(), - E = cycleInstrs.rend(); - I != E; ++I) - ScheduledInstrs[cycle].push_front(*I); + for (SUnit *SU : llvm::reverse(cycleInstrs)) + ScheduledInstrs[cycle].push_front(SU); } } @@ -2899,10 +2886,8 @@ void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) { // Change the registers in instruction as specified in the InstrChanges // map. We need to use the new registers to create the correct order. - for (int i = 0, e = SSD->SUnits.size(); i != e; ++i) { - SUnit *SU = &SSD->SUnits[i]; - SSD->applyInstrChange(SU->getInstr(), *this); - } + for (const SUnit &SU : SSD->SUnits) + SSD->applyInstrChange(SU.getInstr(), *this); // Reorder the instructions in each cycle to fix and improve the // generated code. diff --git a/llvm/lib/CodeGen/MachineSSAContext.cpp b/llvm/lib/CodeGen/MachineSSAContext.cpp new file mode 100644 index 000000000000..8db893535daf --- /dev/null +++ b/llvm/lib/CodeGen/MachineSSAContext.cpp @@ -0,0 +1,52 @@ +//===- MachineSSAContext.cpp ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines a specialization of the GenericSSAContext<X> +/// template class for Machine IR. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineSSAContext.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +MachineBasicBlock *MachineSSAContext::getEntryBlock(MachineFunction &F) { + return &F.front(); +} + +void MachineSSAContext::setFunction(MachineFunction &Fn) { + MF = &Fn; + RegInfo = &MF->getRegInfo(); +} + +Printable MachineSSAContext::print(MachineBasicBlock *Block) const { + return Printable([Block](raw_ostream &Out) { Block->printName(Out); }); +} + +Printable MachineSSAContext::print(MachineInstr *I) const { + return Printable([I](raw_ostream &Out) { I->print(Out); }); +} + +Printable MachineSSAContext::print(Register Value) const { + auto *MRI = RegInfo; + return Printable([MRI, Value](raw_ostream &Out) { + Out << printReg(Value, MRI->getTargetRegisterInfo(), 0, MRI); + + if (Value) { + // Try to print the definition. + if (auto *Instr = MRI->getUniqueVRegDef(Value)) { + Out << ": "; + Instr->print(Out); + } + } + }); +} diff --git a/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/llvm/lib/CodeGen/MachineSSAUpdater.cpp index 930677e4fd7d..48076663ddf5 100644 --- a/llvm/lib/CodeGen/MachineSSAUpdater.cpp +++ b/llvm/lib/CodeGen/MachineSSAUpdater.cpp @@ -126,7 +126,9 @@ MachineInstrBuilder InsertNewDef(unsigned Opcode, } /// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that -/// is live in the middle of the specified block. +/// is live in the middle of the specified block. If ExistingValueOnly is +/// true then this will only return an existing value or $noreg; otherwise new +/// instructions may be inserted to materialize a value. /// /// GetValueInMiddleOfBlock is the same as GetValueAtEndOfBlock except in one /// important case: if there is a definition of the rewritten value after the @@ -143,14 +145,18 @@ MachineInstrBuilder InsertNewDef(unsigned Opcode, /// their respective blocks. However, the use of X happens in the *middle* of /// a block. Because of this, we need to insert a new PHI node in SomeBB to /// merge the appropriate values, and this value isn't live out of the block. -Register MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) { +Register MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB, + bool ExistingValueOnly) { // If there is no definition of the renamed variable in this block, just use // GetValueAtEndOfBlock to do our work. if (!HasValueForBlock(BB)) - return GetValueAtEndOfBlockInternal(BB); + return GetValueAtEndOfBlockInternal(BB, ExistingValueOnly); // If there are no predecessors, just return undef. if (BB->pred_empty()) { + // If we cannot insert new instructions, just return $noreg. + if (ExistingValueOnly) + return Register(); // Insert an implicit_def to represent an undef value. MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF, BB, BB->getFirstTerminator(), @@ -165,7 +171,7 @@ Register MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) { bool isFirstPred = true; for (MachineBasicBlock *PredBB : BB->predecessors()) { - Register PredVal = GetValueAtEndOfBlockInternal(PredBB); + Register PredVal = GetValueAtEndOfBlockInternal(PredBB, ExistingValueOnly); PredValues.push_back(std::make_pair(PredBB, PredVal)); // Compute SingularValue. @@ -185,6 +191,10 @@ Register MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) { if (DupPHI) return DupPHI; + // If we cannot create new instructions, return $noreg now. + if (ExistingValueOnly) + return Register(); + // Otherwise, we do need a PHI: insert one now. MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin(); MachineInstrBuilder InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB, @@ -350,10 +360,13 @@ public: /// for the specified BB and if so, return it. If not, construct SSA form by /// first calculating the required placement of PHIs and then inserting new /// PHIs where needed. -Register MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){ +Register +MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB, + bool ExistingValueOnly) { AvailableValsTy &AvailableVals = getAvailableVals(AV); - if (Register V = AvailableVals[BB]) - return V; + Register ExistingVal = AvailableVals.lookup(BB); + if (ExistingVal || ExistingValueOnly) + return ExistingVal; SSAUpdaterImpl<MachineSSAUpdater> Impl(this, &AvailableVals, InsertedPHIs); return Impl.GetValue(BB); diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index 47d40f0823c8..b043d4c1b0c1 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -90,12 +90,17 @@ cl::opt<bool> VerifyScheduling( "verify-misched", cl::Hidden, cl::desc("Verify machine instrs before and after machine scheduling")); +#ifndef NDEBUG +cl::opt<bool> ViewMISchedDAGs( + "view-misched-dags", cl::Hidden, + cl::desc("Pop up a window to show MISched dags after they are processed")); +#else +const bool ViewMISchedDAGs = false; +#endif // NDEBUG + } // end namespace llvm #ifndef NDEBUG -static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden, - cl::desc("Pop up a window to show MISched dags after they are processed")); - /// In some situations a few uninteresting nodes depend on nearly all other /// nodes in the graph, provide a cutoff to hide them. static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, @@ -111,7 +116,6 @@ static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden, static cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden, cl::desc("Print schedule DAGs")); #else -static const bool ViewMISchedDAGs = false; static const bool PrintDAGs = false; #endif // NDEBUG @@ -561,11 +565,10 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler, MBBRegionsVector MBBRegions; getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown()); - for (MBBRegionsVector::iterator R = MBBRegions.begin(); - R != MBBRegions.end(); ++R) { - MachineBasicBlock::iterator I = R->RegionBegin; - MachineBasicBlock::iterator RegionEnd = R->RegionEnd; - unsigned NumRegionInstrs = R->NumRegionInstrs; + for (const SchedRegion &R : MBBRegions) { + MachineBasicBlock::iterator I = R.RegionBegin; + MachineBasicBlock::iterator RegionEnd = R.RegionEnd; + unsigned NumRegionInstrs = R.NumRegionInstrs; // Notify the scheduler of the region, even if we may skip scheduling // it. Perhaps it still needs to be bundled. diff --git a/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/llvm/lib/CodeGen/MachineTraceMetrics.cpp index 8df23b781ffd..0a5ff276fedc 100644 --- a/llvm/lib/CodeGen/MachineTraceMetrics.cpp +++ b/llvm/lib/CodeGen/MachineTraceMetrics.cpp @@ -80,9 +80,9 @@ bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) { void MachineTraceMetrics::releaseMemory() { MF = nullptr; BlockInfo.clear(); - for (unsigned i = 0; i != TS_NumStrategies; ++i) { - delete Ensembles[i]; - Ensembles[i] = nullptr; + for (Ensemble *&E : Ensembles) { + delete E; + E = nullptr; } } @@ -398,9 +398,9 @@ void MachineTraceMetrics::invalidate(const MachineBasicBlock *MBB) { LLVM_DEBUG(dbgs() << "Invalidate traces through " << printMBBReference(*MBB) << '\n'); BlockInfo[MBB->getNumber()].invalidate(); - for (unsigned i = 0; i != TS_NumStrategies; ++i) - if (Ensembles[i]) - Ensembles[i]->invalidate(MBB); + for (Ensemble *E : Ensembles) + if (E) + E->invalidate(MBB); } void MachineTraceMetrics::verifyAnalysis() const { @@ -408,9 +408,9 @@ void MachineTraceMetrics::verifyAnalysis() const { return; #ifndef NDEBUG assert(BlockInfo.size() == MF->getNumBlockIDs() && "Outdated BlockInfo size"); - for (unsigned i = 0; i != TS_NumStrategies; ++i) - if (Ensembles[i]) - Ensembles[i]->verify(); + for (Ensemble *E : Ensembles) + if (E) + E->verify(); #endif } @@ -984,8 +984,7 @@ addLiveIns(const MachineInstr *DefMI, unsigned DefOp, const MachineBasicBlock *DefMBB = DefMI->getParent(); // Reg is live-in to all blocks in Trace that follow DefMBB. - for (unsigned i = Trace.size(); i; --i) { - const MachineBasicBlock *MBB = Trace[i-1]; + for (const MachineBasicBlock *MBB : llvm::reverse(Trace)) { if (MBB == DefMBB) return; TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; @@ -1204,8 +1203,8 @@ unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const { for (unsigned K = 0; K != PRDepths.size(); ++K) PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]); } else { - for (unsigned K = 0; K != PRDepths.size(); ++K) - PRMax = std::max(PRMax, PRDepths[K]); + for (unsigned PRD : PRDepths) + PRMax = std::max(PRMax, PRD); } // Convert to cycle count. PRMax = TE.MTM.getCycles(PRMax); diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index 32078db76cf3..005d4ad1a328 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -101,6 +101,7 @@ namespace { // Avoid querying the MachineFunctionProperties for each operand. bool isFunctionRegBankSelected; bool isFunctionSelected; + bool isFunctionTracksDebugUserValues; using RegVector = SmallVector<Register, 16>; using RegMaskVector = SmallVector<const uint32_t *, 4>; @@ -384,6 +385,8 @@ unsigned MachineVerifier::verify(const MachineFunction &MF) { MachineFunctionProperties::Property::RegBankSelected); isFunctionSelected = MF.getProperties().hasProperty( MachineFunctionProperties::Property::Selected); + isFunctionTracksDebugUserValues = MF.getProperties().hasProperty( + MachineFunctionProperties::Property::TracksDebugUserValues); LiveVars = nullptr; LiveInts = nullptr; @@ -1605,12 +1608,16 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { } break; } + case TargetOpcode::G_SHL: + case TargetOpcode::G_LSHR: + case TargetOpcode::G_ASHR: case TargetOpcode::G_ROTR: case TargetOpcode::G_ROTL: { LLT Src1Ty = MRI->getType(MI->getOperand(1).getReg()); LLT Src2Ty = MRI->getType(MI->getOperand(2).getReg()); if (Src1Ty.isVector() != Src2Ty.isVector()) { - report("Rotate requires operands to be either all scalars or all vectors", + report("Shifts and rotates require operands to be either all scalars or " + "all vectors", MI); break; } @@ -1980,41 +1987,50 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { if (MO->isUndef()) report("Generic virtual register use cannot be undef", MO, MONum); - // If we're post-Select, we can't have gvregs anymore. - if (isFunctionSelected) { - report("Generic virtual register invalid in a Selected function", - MO, MONum); - return; - } + // Debug value instruction is permitted to use undefined vregs. + // This is a performance measure to skip the overhead of immediately + // pruning unused debug operands. The final undef substitution occurs + // when debug values are allocated in LDVImpl::handleDebugValue, so + // these verifications always apply after this pass. + if (isFunctionTracksDebugUserValues || !MO->isUse() || + !MI->isDebugValue() || !MRI->def_empty(Reg)) { + // If we're post-Select, we can't have gvregs anymore. + if (isFunctionSelected) { + report("Generic virtual register invalid in a Selected function", + MO, MONum); + return; + } - // The gvreg must have a type and it must not have a SubIdx. - LLT Ty = MRI->getType(Reg); - if (!Ty.isValid()) { - report("Generic virtual register must have a valid type", MO, - MONum); - return; - } + // The gvreg must have a type and it must not have a SubIdx. + LLT Ty = MRI->getType(Reg); + if (!Ty.isValid()) { + report("Generic virtual register must have a valid type", MO, + MONum); + return; + } - const RegisterBank *RegBank = MRI->getRegBankOrNull(Reg); + const RegisterBank *RegBank = MRI->getRegBankOrNull(Reg); - // If we're post-RegBankSelect, the gvreg must have a bank. - if (!RegBank && isFunctionRegBankSelected) { - report("Generic virtual register must have a bank in a " - "RegBankSelected function", - MO, MONum); - return; - } + // If we're post-RegBankSelect, the gvreg must have a bank. + if (!RegBank && isFunctionRegBankSelected) { + report("Generic virtual register must have a bank in a " + "RegBankSelected function", + MO, MONum); + return; + } - // Make sure the register fits into its register bank if any. - if (RegBank && Ty.isValid() && - RegBank->getSize() < Ty.getSizeInBits()) { - report("Register bank is too small for virtual register", MO, - MONum); - errs() << "Register bank " << RegBank->getName() << " too small(" - << RegBank->getSize() << ") to fit " << Ty.getSizeInBits() - << "-bits\n"; - return; + // Make sure the register fits into its register bank if any. + if (RegBank && Ty.isValid() && + RegBank->getSize() < Ty.getSizeInBits()) { + report("Register bank is too small for virtual register", MO, + MONum); + errs() << "Register bank " << RegBank->getName() << " too small(" + << RegBank->getSize() << ") to fit " << Ty.getSizeInBits() + << "-bits\n"; + return; + } } + if (SubIdx) { report("Generic virtual register does not allow subregister index", MO, MONum); @@ -2217,8 +2233,8 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { if (LiveInts && Reg.isVirtual()) { if (LiveInts->hasInterval(Reg)) { LI = &LiveInts->getInterval(Reg); - if (SubRegIdx != 0 && !LI->empty() && !LI->hasSubRanges() && - MRI->shouldTrackSubRegLiveness(Reg)) + if (SubRegIdx != 0 && (MO->isDef() || !MO->isUndef()) && !LI->empty() && + !LI->hasSubRanges() && MRI->shouldTrackSubRegLiveness(Reg)) report("Live interval for subreg operand has no subranges", MO, MONum); } else { report("Virtual register has no live interval", MO, MONum); diff --git a/llvm/lib/CodeGen/PHIElimination.cpp b/llvm/lib/CodeGen/PHIElimination.cpp index 77a6c37e1362..7693ab417de9 100644 --- a/llvm/lib/CodeGen/PHIElimination.cpp +++ b/llvm/lib/CodeGen/PHIElimination.cpp @@ -213,7 +213,7 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) { for (auto &I : LoweredPHIs) { if (LIS) LIS->RemoveMachineInstrFromMaps(*I.first); - MF.DeleteMachineInstr(I.first); + MF.deleteMachineInstr(I.first); } // TODO: we should use the incremental DomTree updater here. @@ -626,7 +626,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, if (reusedIncoming || !IncomingReg) { if (LIS) LIS->RemoveMachineInstrFromMaps(*MPhi); - MF.DeleteMachineInstr(MPhi); + MF.deleteMachineInstr(MPhi); } } diff --git a/llvm/lib/CodeGen/PostRASchedulerList.cpp b/llvm/lib/CodeGen/PostRASchedulerList.cpp index b85f00a61eac..d7cd0a583cee 100644 --- a/llvm/lib/CodeGen/PostRASchedulerList.cpp +++ b/llvm/lib/CodeGen/PostRASchedulerList.cpp @@ -252,8 +252,8 @@ void SchedulePostRATDList::exitRegion() { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// dumpSchedule - dump the scheduled Sequence. LLVM_DUMP_METHOD void SchedulePostRATDList::dumpSchedule() const { - for (unsigned i = 0, e = Sequence.size(); i != e; i++) { - if (SUnit *SU = Sequence[i]) + for (const SUnit *SU : Sequence) { + if (SU) dumpNode(*SU); else dbgs() << "**** NOOP ****\n"; @@ -531,11 +531,11 @@ void SchedulePostRATDList::ListScheduleTopDown() { ReleaseSuccessors(&EntrySU); // Add all leaves to Available queue. - for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + for (SUnit &SUnit : SUnits) { // It is available if it has no predecessors. - if (!SUnits[i].NumPredsLeft && !SUnits[i].isAvailable) { - AvailableQueue.push(&SUnits[i]); - SUnits[i].isAvailable = true; + if (!SUnit.NumPredsLeft && !SUnit.isAvailable) { + AvailableQueue.push(&SUnit); + SUnit.isAvailable = true; } } @@ -657,10 +657,7 @@ void SchedulePostRATDList::ListScheduleTopDown() { #ifndef NDEBUG unsigned ScheduledNodes = VerifyScheduledDAG(/*isBottomUp=*/false); - unsigned Noops = 0; - for (unsigned i = 0, e = Sequence.size(); i != e; ++i) - if (!Sequence[i]) - ++Noops; + unsigned Noops = llvm::count(Sequence, nullptr); assert(Sequence.size() - Noops == ScheduledNodes && "The number of nodes scheduled doesn't match the expected number!"); #endif // NDEBUG diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp index 29a88480fd9f..8d8a6126dad0 100644 --- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -953,12 +953,22 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { // LocalStackSlotPass didn't already allocate a slot for it. // If we are told to use the LocalStackAllocationBlock, the stack protector // is expected to be already pre-allocated. - if (!MFI.getUseLocalStackAllocationBlock()) + if (MFI.getStackID(StackProtectorFI) != TargetStackID::Default) { + // If the stack protector isn't on the default stack then it's up to the + // target to set the stack offset. + assert(MFI.getObjectOffset(StackProtectorFI) != 0 && + "Offset of stack protector on non-default stack expected to be " + "already set."); + assert(!MFI.isObjectPreAllocated(MFI.getStackProtectorIndex()) && + "Stack protector on non-default stack expected to not be " + "pre-allocated by LocalStackSlotPass."); + } else if (!MFI.getUseLocalStackAllocationBlock()) { AdjustStackOffset(MFI, StackProtectorFI, StackGrowsDown, Offset, MaxAlign, Skew); - else if (!MFI.isObjectPreAllocated(MFI.getStackProtectorIndex())) + } else if (!MFI.isObjectPreAllocated(MFI.getStackProtectorIndex())) { llvm_unreachable( "Stack protector not pre-allocated by LocalStackSlotPass."); + } // Assign large stack objects first. for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) { diff --git a/llvm/lib/CodeGen/RDFGraph.cpp b/llvm/lib/CodeGen/RDFGraph.cpp index f605068e076d..882f8e91bf1d 100644 --- a/llvm/lib/CodeGen/RDFGraph.cpp +++ b/llvm/lib/CodeGen/RDFGraph.cpp @@ -1500,8 +1500,8 @@ void DataFlowGraph::buildPhis(BlockRefsMap &PhiM, RegisterSet &AllRefs, // Erase from MaxRefs all elements in the closure. auto Begin = MaxRefs.begin(); - for (unsigned i = ClosureIdx.size(); i != 0; --i) - MaxRefs.erase(Begin + ClosureIdx[i-1]); + for (unsigned Idx : llvm::reverse(ClosureIdx)) + MaxRefs.erase(Begin + Idx); } } diff --git a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp new file mode 100644 index 000000000000..9f1012c95964 --- /dev/null +++ b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp @@ -0,0 +1,121 @@ +//===- RegAllocEvictionAdvisor.cpp - eviction advisor ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implementation of the default eviction advisor and of the Analysis pass. +// +//===----------------------------------------------------------------------===// + +#include "RegAllocEvictionAdvisor.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +static cl::opt<RegAllocEvictionAdvisorAnalysis::AdvisorMode> Mode( + "regalloc-enable-advisor", cl::Hidden, + cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), + cl::desc("Enable regalloc advisor mode"), + cl::values( + clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, + "default", "Default"), + clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, + "release", "precompiled"), + clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, + "development", "for training"))); + +static cl::opt<bool> EnableLocalReassignment( + "enable-local-reassign", cl::Hidden, + cl::desc("Local reassignment can yield better allocation decisions, but " + "may be compile time intensive"), + cl::init(false)); + +#define DEBUG_TYPE "regalloc" + +char RegAllocEvictionAdvisorAnalysis::ID = 0; +INITIALIZE_PASS(RegAllocEvictionAdvisorAnalysis, "regalloc-evict", + "Regalloc eviction policy", false, true) + +namespace { +class DefaultEvictionAdvisorAnalysis final + : public RegAllocEvictionAdvisorAnalysis { +public: + DefaultEvictionAdvisorAnalysis(bool NotAsRequested) + : RegAllocEvictionAdvisorAnalysis(AdvisorMode::Default), + NotAsRequested(NotAsRequested) {} + + // support for isa<> and dyn_cast. + static bool classof(const RegAllocEvictionAdvisorAnalysis *R) { + return R->getAdvisorMode() == AdvisorMode::Default; + } + +private: + std::unique_ptr<RegAllocEvictionAdvisor> + getAdvisor(const MachineFunction &MF, LiveRegMatrix *Matrix, + LiveIntervals *LIS, VirtRegMap *VRM, + const RegisterClassInfo &RegClassInfo, + ExtraRegInfo *ExtraInfo) override { + return std::make_unique<DefaultEvictionAdvisor>(MF, Matrix, LIS, VRM, + RegClassInfo, ExtraInfo); + } + bool doInitialization(Module &M) override { + if (NotAsRequested) + M.getContext().emitError("Requested regalloc eviction advisor analysis " + "could be created. Using default"); + return RegAllocEvictionAdvisorAnalysis::doInitialization(M); + } + const bool NotAsRequested; +}; +} // namespace + +template <> Pass *llvm::callDefaultCtor<RegAllocEvictionAdvisorAnalysis>() { + Pass *Ret = nullptr; + switch (Mode) { + case RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default: + Ret = new DefaultEvictionAdvisorAnalysis(/*NotAsRequested*/ false); + break; + case RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development: + // TODO(mtrofin): add implementation + break; + case RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release: + // TODO(mtrofin): add implementation + break; + } + if (Ret) + return Ret; + return new DefaultEvictionAdvisorAnalysis(/*NotAsRequested*/ true); +} + +StringRef RegAllocEvictionAdvisorAnalysis::getPassName() const { + switch (getAdvisorMode()) { + case AdvisorMode::Default: + return "Default Regalloc Eviction Advisor"; + case AdvisorMode::Release: + return "Release mode Regalloc Eviction Advisor"; + case AdvisorMode::Development: + return "Development mode Regalloc Eviction Advisor"; + } + llvm_unreachable("Unknown advisor kind"); +} + +RegAllocEvictionAdvisor::RegAllocEvictionAdvisor( + const MachineFunction &MF, LiveRegMatrix *Matrix, LiveIntervals *LIS, + VirtRegMap *VRM, const RegisterClassInfo &RegClassInfo, + ExtraRegInfo *ExtraInfo) + : MF(MF), Matrix(Matrix), LIS(LIS), VRM(VRM), MRI(&VRM->getRegInfo()), + TRI(MF.getSubtarget().getRegisterInfo()), RegClassInfo(RegClassInfo), + RegCosts(TRI->getRegisterCosts(MF)), ExtraInfo(ExtraInfo), + EnableLocalReassign(EnableLocalReassignment || + MF.getSubtarget().enableRALocalReassignment( + MF.getTarget().getOptLevel())) {} diff --git a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h index 85fd3207888b..debb75ed5020 100644 --- a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h +++ b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h @@ -18,6 +18,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Register.h" #include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/Config/llvm-config.h" #include "llvm/Pass.h" namespace llvm { @@ -85,6 +86,215 @@ struct EvictionCost { std::tie(O.BrokenHints, O.MaxWeight); } }; + +/// Track allocation stage and eviction loop prevention during allocation. +// TODO(mtrofin): Consider exposing RAGreedy in a header instead, and folding +// this back into it. +class ExtraRegInfo final { + // RegInfo - Keep additional information about each live range. + struct RegInfo { + LiveRangeStage Stage = RS_New; + + // Cascade - Eviction loop prevention. See + // canEvictInterferenceBasedOnCost(). + unsigned Cascade = 0; + + RegInfo() = default; + }; + + IndexedMap<RegInfo, VirtReg2IndexFunctor> Info; + unsigned NextCascade = 1; + +public: + ExtraRegInfo() = default; + ExtraRegInfo(const ExtraRegInfo &) = delete; + + LiveRangeStage getStage(Register Reg) const { return Info[Reg].Stage; } + + LiveRangeStage getStage(const LiveInterval &VirtReg) const { + return getStage(VirtReg.reg()); + } + + void setStage(Register Reg, LiveRangeStage Stage) { + Info.grow(Reg.id()); + Info[Reg].Stage = Stage; + } + + void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) { + setStage(VirtReg.reg(), Stage); + } + + /// Return the current stage of the register, if present, otherwise initialize + /// it and return that. + LiveRangeStage getOrInitStage(Register Reg) { + Info.grow(Reg.id()); + return getStage(Reg); + } + + unsigned getCascade(Register Reg) const { return Info[Reg].Cascade; } + + void setCascade(Register Reg, unsigned Cascade) { + Info.grow(Reg.id()); + Info[Reg].Cascade = Cascade; + } + + unsigned getOrAssignNewCascade(Register Reg) { + unsigned Cascade = getCascade(Reg); + if (!Cascade) { + Cascade = NextCascade++; + setCascade(Reg, Cascade); + } + return Cascade; + } + + unsigned getCascadeOrCurrentNext(Register Reg) const { + unsigned Cascade = getCascade(Reg); + if (!Cascade) + Cascade = NextCascade; + return Cascade; + } + + template <typename Iterator> + void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) { + for (; Begin != End; ++Begin) { + Register Reg = *Begin; + Info.grow(Reg.id()); + if (Info[Reg].Stage == RS_New) + Info[Reg].Stage = NewStage; + } + } + void LRE_DidCloneVirtReg(Register New, Register Old); +}; + +/// Interface to the eviction advisor, which is responsible for making a +/// decision as to which live ranges should be evicted (if any). +class RegAllocEvictionAdvisor { +public: + RegAllocEvictionAdvisor(const RegAllocEvictionAdvisor &) = delete; + RegAllocEvictionAdvisor(RegAllocEvictionAdvisor &&) = delete; + virtual ~RegAllocEvictionAdvisor() = default; + + /// Find a physical register that can be freed by evicting the FixedRegisters, + /// or return NoRegister. The eviction decision is assumed to be correct (i.e. + /// no fixed live ranges are evicted) and profitable. + virtual MCRegister + tryFindEvictionCandidate(LiveInterval &VirtReg, const AllocationOrder &Order, + uint8_t CostPerUseLimit, + const SmallVirtRegSet &FixedRegisters) const = 0; + + /// Find out if we can evict the live ranges occupying the given PhysReg, + /// which is a hint (preferred register) for VirtReg. + virtual bool + canEvictHintInterference(LiveInterval &VirtReg, MCRegister PhysReg, + const SmallVirtRegSet &FixedRegisters) const = 0; + + /// Returns true if the given \p PhysReg is a callee saved register and has + /// not been used for allocation yet. + bool isUnusedCalleeSavedReg(MCRegister PhysReg) const; + +protected: + RegAllocEvictionAdvisor(const MachineFunction &MF, LiveRegMatrix *Matrix, + LiveIntervals *LIS, VirtRegMap *VRM, + const RegisterClassInfo &RegClassInfo, + ExtraRegInfo *ExtraInfo); + + Register canReassign(LiveInterval &VirtReg, Register PrevReg) const; + + const MachineFunction &MF; + LiveRegMatrix *const Matrix; + LiveIntervals *const LIS; + VirtRegMap *const VRM; + MachineRegisterInfo *const MRI; + const TargetRegisterInfo *const TRI; + const RegisterClassInfo &RegClassInfo; + const ArrayRef<uint8_t> RegCosts; + ExtraRegInfo *const ExtraInfo; + + /// Run or not the local reassignment heuristic. This information is + /// obtained from the TargetSubtargetInfo. + const bool EnableLocalReassign; + +private: + unsigned NextCascade = 1; +}; + +/// ImmutableAnalysis abstraction for fetching the Eviction Advisor. We model it +/// as an analysis to decouple the user from the implementation insofar as +/// dependencies on other analyses goes. The motivation for it being an +/// immutable pass is twofold: +/// - in the ML implementation case, the evaluator is stateless but (especially +/// in the development mode) expensive to set up. With an immutable pass, we set +/// it up once. +/// - in the 'development' mode ML case, we want to capture the training log +/// during allocation (this is a log of features encountered and decisions +/// made), and then measure a score, potentially a few steps after allocation +/// completes. So we need the properties of an immutable pass to keep the logger +/// state around until we can make that measurement. +/// +/// Because we need to offer additional services in 'development' mode, the +/// implementations of this analysis need to implement RTTI support. +class RegAllocEvictionAdvisorAnalysis : public ImmutablePass { +public: + enum class AdvisorMode : int { Default, Release, Development }; + + RegAllocEvictionAdvisorAnalysis(AdvisorMode Mode) + : ImmutablePass(ID), Mode(Mode){}; + static char ID; + + /// Get an advisor for the given context (i.e. machine function, etc) + virtual std::unique_ptr<RegAllocEvictionAdvisor> + getAdvisor(const MachineFunction &MF, LiveRegMatrix *Matrix, + LiveIntervals *LIS, VirtRegMap *VRM, + const RegisterClassInfo &RegClassInfo, + ExtraRegInfo *ExtraInfo) = 0; + AdvisorMode getAdvisorMode() const { return Mode; } + +private: + // This analysis preserves everything, and subclasses may have additional + // requirements. + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + + StringRef getPassName() const override; + const AdvisorMode Mode; +}; + +/// Specialization for the API used by the analysis infrastructure to create +/// an instance of the eviction advisor. +template <> Pass *callDefaultCtor<RegAllocEvictionAdvisorAnalysis>(); + +// TODO(mtrofin): implement these. +#ifdef LLVM_HAVE_TF_AOT +RegAllocEvictionAdvisorAnalysis *createReleaseModeAdvisor(); +#endif + +#ifdef LLVM_HAVE_TF_API +RegAllocEvictionAdvisorAnalysis *createDevelopmentModeAdvisor(); +#endif + +// TODO: move to RegAllocEvictionAdvisor.cpp when we move implementation +// out of RegAllocGreedy.cpp +class DefaultEvictionAdvisor : public RegAllocEvictionAdvisor { +public: + DefaultEvictionAdvisor(const MachineFunction &MF, LiveRegMatrix *Matrix, + LiveIntervals *LIS, VirtRegMap *VRM, + const RegisterClassInfo &RegClassInfo, + ExtraRegInfo *ExtraInfo) + : RegAllocEvictionAdvisor(MF, Matrix, LIS, VRM, RegClassInfo, ExtraInfo) { + } + +private: + MCRegister tryFindEvictionCandidate(LiveInterval &, const AllocationOrder &, + uint8_t, + const SmallVirtRegSet &) const override; + bool canEvictHintInterference(LiveInterval &, MCRegister, + const SmallVirtRegSet &) const override; + bool canEvictInterferenceBasedOnCost(LiveInterval &, MCRegister, bool, + EvictionCost &, + const SmallVirtRegSet &) const; + bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool) const; +}; } // namespace llvm #endif // LLVM_CODEGEN_REGALLOCEVICTIONADVISOR_H diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index 50411c177007..ce3cf31dbd6b 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -112,12 +112,6 @@ static cl::opt<bool> ExhaustiveSearch( "and interference cutoffs of last chance recoloring"), cl::Hidden); -static cl::opt<bool> EnableLocalReassignment( - "enable-local-reassign", cl::Hidden, - cl::desc("Local reassignment can yield better allocation decisions, but " - "may be compile time intensive"), - cl::init(false)); - static cl::opt<bool> EnableDeferredSpilling( "enable-deferred-spilling", cl::Hidden, cl::desc("Instead of spilling a variable right away, defer the actual " @@ -172,8 +166,9 @@ class RAGreedy : public MachineFunctionPass, // state std::unique_ptr<Spiller> SpillerInstance; PQueue Queue; - unsigned NextCascade; std::unique_ptr<VirtRegAuxInfo> VRAI; + Optional<ExtraRegInfo> ExtraInfo; + std::unique_ptr<RegAllocEvictionAdvisor> EvictAdvisor; // Enum CutOffStage to keep a track whether the register allocation failed // because of the cutoffs encountered in last chance recoloring. @@ -195,76 +190,6 @@ class RAGreedy : public MachineFunctionPass, static const char *const StageName[]; #endif - // RegInfo - Keep additional information about each live range. - struct RegInfo { - LiveRangeStage Stage = RS_New; - - // Cascade - Eviction loop prevention. See - // canEvictInterferenceBasedOnCost(). - unsigned Cascade = 0; - - RegInfo() = default; - }; - - IndexedMap<RegInfo, VirtReg2IndexFunctor> ExtraRegInfo; - - LiveRangeStage getStage(Register Reg) const { - return ExtraRegInfo[Reg].Stage; - } - - LiveRangeStage getStage(const LiveInterval &VirtReg) const { - return getStage(VirtReg.reg()); - } - - void setStage(Register Reg, LiveRangeStage Stage) { - ExtraRegInfo.resize(MRI->getNumVirtRegs()); - ExtraRegInfo[Reg].Stage = Stage; - } - - void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) { - setStage(VirtReg.reg(), Stage); - } - - /// Return the current stage of the register, if present, otherwise initialize - /// it and return that. - LiveRangeStage getOrInitStage(Register Reg) { - ExtraRegInfo.grow(Reg); - return getStage(Reg); - } - - unsigned getCascade(Register Reg) const { return ExtraRegInfo[Reg].Cascade; } - - void setCascade(Register Reg, unsigned Cascade) { - ExtraRegInfo.resize(MRI->getNumVirtRegs()); - ExtraRegInfo[Reg].Cascade = Cascade; - } - - unsigned getOrAssignNewCascade(Register Reg) { - unsigned Cascade = getCascade(Reg); - if (!Cascade) { - Cascade = NextCascade++; - setCascade(Reg, Cascade); - } - return Cascade; - } - - unsigned getCascadeOrCurrentNext(Register Reg) const { - unsigned Cascade = getCascade(Reg); - if (!Cascade) - Cascade = NextCascade; - return Cascade; - } - - template<typename Iterator> - void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) { - ExtraRegInfo.resize(MRI->getNumVirtRegs()); - for (;Begin != End; ++Begin) { - Register Reg = *Begin; - if (ExtraRegInfo[Reg].Stage == RS_New) - ExtraRegInfo[Reg].Stage = NewStage; - } - } - /// EvictionTrack - Keeps track of past evictions in order to optimize region /// split decision. class EvictionTrack { @@ -375,10 +300,6 @@ class RAGreedy : public MachineFunctionPass, /// Callee-save register cost, calculated once per machine function. BlockFrequency CSRCost; - /// Run or not the local reassignment heuristic. This information is - /// obtained from the TargetSubtargetInfo. - bool EnableLocalReassign; - /// Enable or not the consideration of the cost of local intervals created /// by a split candidate when choosing the best split candidate. bool EnableAdvancedRASplitCost; @@ -447,13 +368,6 @@ private: bool calcCompactRegion(GlobalSplitCandidate&); void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>); void calcGapWeights(MCRegister, SmallVectorImpl<float> &); - Register canReassign(LiveInterval &VirtReg, Register PrevReg) const; - bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool) const; - bool canEvictInterferenceBasedOnCost(LiveInterval &, MCRegister, bool, - EvictionCost &, - const SmallVirtRegSet &) const; - bool canEvictHintInterference(LiveInterval &, MCRegister, - const SmallVirtRegSet &) const; bool canEvictInterferenceInRange(const LiveInterval &VirtReg, MCRegister PhysReg, SlotIndex Start, SlotIndex End, EvictionCost &MaxCost) const; @@ -529,8 +443,6 @@ private: BlockFrequency getBrokenHintFreq(const HintsInfo &, MCRegister); void collectHintInfo(Register, HintsInfo &); - bool isUnusedCalleeSavedReg(MCRegister PhysReg) const; - /// Greedy RA statistic to remark. struct RAGreedyStats { unsigned Reloads = 0; @@ -597,6 +509,7 @@ INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix) INITIALIZE_PASS_DEPENDENCY(EdgeBundles) INITIALIZE_PASS_DEPENDENCY(SpillPlacement) INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass) +INITIALIZE_PASS_DEPENDENCY(RegAllocEvictionAdvisorAnalysis) INITIALIZE_PASS_END(RAGreedy, "greedy", "Greedy Register Allocator", false, false) @@ -663,6 +576,7 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<EdgeBundles>(); AU.addRequired<SpillPlacement>(); AU.addRequired<MachineOptimizationRemarkEmitterPass>(); + AU.addRequired<RegAllocEvictionAdvisorAnalysis>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -696,22 +610,25 @@ void RAGreedy::LRE_WillShrinkVirtReg(Register VirtReg) { } void RAGreedy::LRE_DidCloneVirtReg(Register New, Register Old) { + ExtraInfo->LRE_DidCloneVirtReg(New, Old); +} + +void ExtraRegInfo::LRE_DidCloneVirtReg(Register New, Register Old) { // Cloning a register we haven't even heard about yet? Just ignore it. - if (!ExtraRegInfo.inBounds(Old)) + if (!Info.inBounds(Old)) return; // LRE may clone a virtual register because dead code elimination causes it to // be split into connected components. The new components are much smaller // than the original, so they should get a new chance at being assigned. // same stage as the parent. - ExtraRegInfo[Old].Stage = RS_Assign; - ExtraRegInfo.grow(New); - ExtraRegInfo[New] = ExtraRegInfo[Old]; + Info[Old].Stage = RS_Assign; + Info.grow(New.id()); + Info[New] = Info[Old]; } void RAGreedy::releaseMemory() { SpillerInstance.reset(); - ExtraRegInfo.clear(); GlobalCand.clear(); } @@ -725,10 +642,10 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { assert(Reg.isVirtual() && "Can only enqueue virtual registers"); unsigned Prio; - auto Stage = getOrInitStage(Reg); + auto Stage = ExtraInfo->getOrInitStage(Reg); if (Stage == RS_New) { Stage = RS_Assign; - setStage(Reg, Stage); + ExtraInfo->setStage(Reg, Stage); } if (Stage == RS_Split) { // Unsplit ranges that couldn't be allocated immediately are deferred until @@ -824,7 +741,8 @@ MCRegister RAGreedy::tryAssign(LiveInterval &VirtReg, MCRegister PhysHint = Hint.asMCReg(); LLVM_DEBUG(dbgs() << "missed hint " << printReg(PhysHint, TRI) << '\n'); - if (canEvictHintInterference(VirtReg, PhysHint, FixedRegisters)) { + if (EvictAdvisor->canEvictHintInterference(VirtReg, PhysHint, + FixedRegisters)) { evictInterference(VirtReg, PhysHint, NewVRegs); return PhysHint; } @@ -850,7 +768,8 @@ MCRegister RAGreedy::tryAssign(LiveInterval &VirtReg, // Interference eviction //===----------------------------------------------------------------------===// -Register RAGreedy::canReassign(LiveInterval &VirtReg, Register PrevReg) const { +Register RegAllocEvictionAdvisor::canReassign(LiveInterval &VirtReg, + Register PrevReg) const { auto Order = AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix); MCRegister PhysReg; @@ -889,9 +808,10 @@ Register RAGreedy::canReassign(LiveInterval &VirtReg, Register PrevReg) const { /// register. /// @param B The live range to be evicted. /// @param BreaksHint True when B is already assigned to its preferred register. -bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, - LiveInterval &B, bool BreaksHint) const { - bool CanSplit = getStage(B) < RS_Spill; +bool DefaultEvictionAdvisor::shouldEvict(LiveInterval &A, bool IsHint, + LiveInterval &B, + bool BreaksHint) const { + bool CanSplit = ExtraInfo->getStage(B) < RS_Spill; // Be fairly aggressive about following hints as long as the evictee can be // split. @@ -907,7 +827,7 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, /// canEvictHintInterference - return true if the interference for VirtReg /// on the PhysReg, which is VirtReg's hint, can be evicted in favor of VirtReg. -bool RAGreedy::canEvictHintInterference( +bool DefaultEvictionAdvisor::canEvictHintInterference( LiveInterval &VirtReg, MCRegister PhysReg, const SmallVirtRegSet &FixedRegisters) const { EvictionCost MaxCost; @@ -925,7 +845,7 @@ bool RAGreedy::canEvictHintInterference( /// @param MaxCost Only look for cheaper candidates and update with new cost /// when returning true. /// @returns True when interference can be evicted cheaper than MaxCost. -bool RAGreedy::canEvictInterferenceBasedOnCost( +bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost( LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint, EvictionCost &MaxCost, const SmallVirtRegSet &FixedRegisters) const { // It is only possible to evict virtual register interference. @@ -941,9 +861,7 @@ bool RAGreedy::canEvictInterferenceBasedOnCost( // // This works out so a register without a cascade number is allowed to evict // anything, and it can be evicted by anything. - unsigned Cascade = ExtraRegInfo[VirtReg.reg()].Cascade; - if (!Cascade) - Cascade = NextCascade; + unsigned Cascade = ExtraInfo->getCascadeOrCurrentNext(VirtReg.reg()); EvictionCost Cost; for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { @@ -965,7 +883,7 @@ bool RAGreedy::canEvictInterferenceBasedOnCost( return false; // Never evict spill products. They cannot split or spill. - if (getStage(*Intf) == RS_Done) + if (ExtraInfo->getStage(*Intf) == RS_Done) return false; // Once a live range becomes small enough, it is urgent that we find a // register for it. This is indicated by an infinite spill weight. These @@ -980,7 +898,7 @@ bool RAGreedy::canEvictInterferenceBasedOnCost( RegClassInfo.getNumAllocatableRegs( MRI->getRegClass(Intf->reg()))); // Only evict older cascades or live ranges without a cascade. - unsigned IntfCascade = ExtraRegInfo[Intf->reg()].Cascade; + unsigned IntfCascade = ExtraInfo->getCascade(Intf->reg()); if (Cascade <= IntfCascade) { if (!Urgent) return false; @@ -1043,7 +961,7 @@ bool RAGreedy::canEvictInterferenceInRange(const LiveInterval &VirtReg, if (!Register::isVirtualRegister(Intf->reg())) return false; // Never evict spill products. They cannot split or spill. - if (getStage(*Intf) == RS_Done) + if (ExtraInfo->getStage(*Intf) == RS_Done) return false; // Would this break a satisfied hint? @@ -1106,7 +1024,7 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, MCRegister PhysReg, // Make sure that VirtReg has a cascade number, and assign that cascade // number to every evicted register. These live ranges than then only be // evicted by a newer cascade, preventing infinite loops. - unsigned Cascade = getOrAssignNewCascade(VirtReg.reg()); + unsigned Cascade = ExtraInfo->getOrAssignNewCascade(VirtReg.reg()); LLVM_DEBUG(dbgs() << "evicting " << printReg(PhysReg, TRI) << " interference: Cascade " << Cascade << '\n'); @@ -1132,10 +1050,10 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, MCRegister PhysReg, LastEvicted.addEviction(PhysReg, VirtReg.reg(), Intf->reg()); Matrix->unassign(*Intf); - assert((getCascade(Intf->reg()) < Cascade || + assert((ExtraInfo->getCascade(Intf->reg()) < Cascade || VirtReg.isSpillable() < Intf->isSpillable()) && "Cannot decrease cascade number, illegal eviction"); - setCascade(Intf->reg(), Cascade); + ExtraInfo->setCascade(Intf->reg(), Cascade); ++NumEvicted; NewVRegs.push_back(Intf->reg()); } @@ -1143,7 +1061,7 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, MCRegister PhysReg, /// Returns true if the given \p PhysReg is a callee saved register and has not /// been used for allocation yet. -bool RAGreedy::isUnusedCalleeSavedReg(MCRegister PhysReg) const { +bool RegAllocEvictionAdvisor::isUnusedCalleeSavedReg(MCRegister PhysReg) const { MCRegister CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg); if (!CSR) return false; @@ -1151,7 +1069,7 @@ bool RAGreedy::isUnusedCalleeSavedReg(MCRegister PhysReg) const { return !Matrix->isPhysRegUsed(PhysReg); } -MCRegister RAGreedy::tryFindEvictionCandidate( +MCRegister DefaultEvictionAdvisor::tryFindEvictionCandidate( LiveInterval &VirtReg, const AllocationOrder &Order, uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const { // Keep track of the cheapest interference seen so far. @@ -1225,8 +1143,8 @@ MCRegister RAGreedy::tryEvict(LiveInterval &VirtReg, AllocationOrder &Order, NamedRegionTimer T("evict", "Evict", TimerGroupName, TimerGroupDescription, TimePassesIsEnabled); - MCRegister BestPhys = - tryFindEvictionCandidate(VirtReg, Order, CostPerUseLimit, FixedRegisters); + MCRegister BestPhys = EvictAdvisor->tryFindEvictionCandidate( + VirtReg, Order, CostPerUseLimit, FixedRegisters); if (BestPhys.isValid()) evictInterference(VirtReg, BestPhys, NewVRegs); return BestPhys; @@ -1769,8 +1687,8 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, // the ActiveBlocks list with each candidate. We need to filter out // duplicates. BitVector Todo = SA->getThroughBlocks(); - for (unsigned c = 0; c != UsedCands.size(); ++c) { - ArrayRef<unsigned> Blocks = GlobalCand[UsedCands[c]].ActiveBlocks; + for (unsigned UsedCand : UsedCands) { + ArrayRef<unsigned> Blocks = GlobalCand[UsedCand].ActiveBlocks; for (unsigned Number : Blocks) { if (!Todo.test(Number)) continue; @@ -1817,13 +1735,13 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, const LiveInterval &Reg = LIS->getInterval(LREdit.get(I)); // Ignore old intervals from DCE. - if (getOrInitStage(Reg.reg()) != RS_New) + if (ExtraInfo->getOrInitStage(Reg.reg()) != RS_New) continue; // Remainder interval. Don't try splitting again, spill if it doesn't // allocate. if (IntvMap[I] == 0) { - setStage(Reg, RS_Spill); + ExtraInfo->setStage(Reg, RS_Spill); continue; } @@ -1834,7 +1752,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, LLVM_DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks << " blocks as original.\n"); // Don't allow repeated splitting as a safe guard against looping. - setStage(Reg, RS_Split2); + ExtraInfo->setStage(Reg, RS_Split2); } continue; } @@ -1899,7 +1817,7 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, unsigned BestCand = NoCand; for (MCPhysReg PhysReg : Order) { assert(PhysReg); - if (IgnoreCSR && isUnusedCalleeSavedReg(PhysReg)) + if (IgnoreCSR && EvictAdvisor->isUnusedCalleeSavedReg(PhysReg)) continue; // Discard bad candidates before we run out of interference cache cursors. @@ -2065,8 +1983,8 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, // goes straight to spilling, the new local ranges get to stay RS_New. for (unsigned I = 0, E = LREdit.size(); I != E; ++I) { const LiveInterval &LI = LIS->getInterval(LREdit.get(I)); - if (getOrInitStage(LI.reg()) == RS_New && IntvMap[I] == 0) - setStage(LI, RS_Spill); + if (ExtraInfo->getOrInitStage(LI.reg()) == RS_New && IntvMap[I] == 0) + ExtraInfo->setStage(LI, RS_Spill); } if (VerifyEnabled) @@ -2152,7 +2070,7 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, SE->finish(&IntvMap); DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS); // Assign all new registers to RS_Spill. This was the last chance. - setStage(LREdit.begin(), LREdit.end(), RS_Spill); + ExtraInfo->setStage(LREdit.begin(), LREdit.end(), RS_Spill); return 0; } @@ -2320,7 +2238,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // These rules allow a 3 -> 2+3 split once, which we need. They also prevent // excessive splitting and infinite loops. // - bool ProgressRequired = getStage(VirtReg) >= RS_Split2; + bool ProgressRequired = ExtraInfo->getStage(VirtReg) >= RS_Split2; // Best split candidate. unsigned BestBefore = NumGaps; @@ -2456,7 +2374,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, assert(!ProgressRequired && "Didn't make progress when it was required."); for (unsigned I = 0, E = IntvMap.size(); I != E; ++I) if (IntvMap[I] == 1) { - setStage(LIS->getInterval(LREdit.get(I)), RS_Split2); + ExtraInfo->setStage(LIS->getInterval(LREdit.get(I)), RS_Split2); LLVM_DEBUG(dbgs() << ' ' << printReg(LREdit.get(I))); } LLVM_DEBUG(dbgs() << '\n'); @@ -2477,7 +2395,7 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl<Register> &NewVRegs, const SmallVirtRegSet &FixedRegisters) { // Ranges must be Split2 or less. - if (getStage(VirtReg) >= RS_Spill) + if (ExtraInfo->getStage(VirtReg) >= RS_Spill) return 0; // Local intervals are handled separately. @@ -2499,7 +2417,7 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, // First try to split around a region spanning multiple blocks. RS_Split2 // ranges already made dubious progress with region splitting, so they go // straight to single block splitting. - if (getStage(VirtReg) < RS_Split2) { + if (ExtraInfo->getStage(VirtReg) < RS_Split2) { MCRegister PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs); if (PhysReg || !NewVRegs.empty()) return PhysReg; @@ -2551,7 +2469,7 @@ bool RAGreedy::mayRecolorAllInterferences( // it would not be recolorable as it is in the same state as VirtReg. // However, if VirtReg has tied defs and Intf doesn't, then // there is still a point in examining if it can be recolorable. - if (((getStage(*Intf) == RS_Done && + if (((ExtraInfo->getStage(*Intf) == RS_Done && MRI->getRegClass(Intf->reg()) == CurRC) && !(hasTiedDef(MRI, VirtReg.reg()) && !hasTiedDef(MRI, Intf->reg()))) || @@ -2615,7 +2533,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, LLVM_DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n'); // Ranges must be Done. - assert((getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) && + assert((ExtraInfo->getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) && "Last chance recoloring should really be last chance"); // Set the max depth to LastChanceRecoloringMaxDepth. // We may want to reconsider that if we end up with a too large search space @@ -2806,7 +2724,7 @@ MCRegister RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order, MCRegister PhysReg, uint8_t &CostPerUseLimit, SmallVectorImpl<Register> &NewVRegs) { - if (getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) { + if (ExtraInfo->getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) { // We choose spill over using the CSR for the first time if the spill cost // is lower than CSRCost. SA->analyze(&VirtReg); @@ -2818,7 +2736,7 @@ RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order, CostPerUseLimit = 1; return 0; } - if (getStage(VirtReg) < RS_Split) { + if (ExtraInfo->getStage(VirtReg) < RS_Split) { // We choose pre-splitting over using the CSR for the first time if // the cost of splitting is lower than CSRCost. SA->analyze(&VirtReg); @@ -3051,8 +2969,8 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, // When NewVRegs is not empty, we may have made decisions such as evicting // a virtual register, go with the earlier decisions and use the physical // register. - if (CSRCost.getFrequency() && isUnusedCalleeSavedReg(PhysReg) && - NewVRegs.empty()) { + if (CSRCost.getFrequency() && + EvictAdvisor->isUnusedCalleeSavedReg(PhysReg) && NewVRegs.empty()) { MCRegister CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg, CostPerUseLimit, NewVRegs); if (CSRReg || !NewVRegs.empty()) @@ -3063,9 +2981,9 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, return PhysReg; } - LiveRangeStage Stage = getStage(VirtReg); + LiveRangeStage Stage = ExtraInfo->getStage(VirtReg); LLVM_DEBUG(dbgs() << StageName[Stage] << " Cascade " - << getCascade(VirtReg.reg()) << '\n'); + << ExtraInfo->getCascade(VirtReg.reg()) << '\n'); // Try to evict a less worthy live range, but only for ranges from the primary // queue. The RS_Split ranges already failed to do this, and they should not @@ -3094,7 +3012,7 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, // Wait until the second time, when all smaller ranges have been allocated. // This gives a better picture of the interference to split around. if (Stage < RS_Split) { - setStage(VirtReg, RS_Split); + ExtraInfo->setStage(VirtReg, RS_Split); LLVM_DEBUG(dbgs() << "wait for second round\n"); NewVRegs.push_back(VirtReg.reg()); return 0; @@ -3120,12 +3038,12 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, // Finally spill VirtReg itself. if ((EnableDeferredSpilling || TRI->shouldUseDeferredSpillingForVirtReg(*MF, VirtReg)) && - getStage(VirtReg) < RS_Memory) { + ExtraInfo->getStage(VirtReg) < RS_Memory) { // TODO: This is experimental and in particular, we do not model // the live range splitting done by spilling correctly. // We would need a deep integration with the spiller to do the // right thing here. Anyway, that is still good for early testing. - setStage(VirtReg, RS_Memory); + ExtraInfo->setStage(VirtReg, RS_Memory); LLVM_DEBUG(dbgs() << "Do as if this register is in memory\n"); NewVRegs.push_back(VirtReg.reg()); } else { @@ -3133,7 +3051,7 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, TimerGroupDescription, TimePassesIsEnabled); LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); spiller().spill(LRE); - setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done); + ExtraInfo->setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done); // Tell LiveDebugVariables about the new ranges. Ranges not being covered by // the new regs are kept in LDV (still mapping to the old register), until @@ -3316,10 +3234,6 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { TII = MF->getSubtarget().getInstrInfo(); RCI.runOnMachineFunction(mf); - EnableLocalReassign = EnableLocalReassignment || - MF->getSubtarget().enableRALocalReassignment( - MF->getTarget().getOptLevel()); - EnableAdvancedRASplitCost = ConsiderLocalIntervalCost.getNumOccurrences() ? ConsiderLocalIntervalCost @@ -3354,8 +3268,9 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops)); SE.reset(new SplitEditor(*SA, *AA, *LIS, *VRM, *DomTree, *MBFI, *VRAI)); - ExtraRegInfo.clear(); - NextCascade = 1; + ExtraInfo.emplace(); + EvictAdvisor = getAnalysis<RegAllocEvictionAdvisorAnalysis>().getAdvisor( + *MF, Matrix, LIS, VRM, RegClassInfo, &*ExtraInfo); IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI); GlobalCand.resize(32); // This will grow as needed. SetOfBrokenHints.clear(); diff --git a/llvm/lib/CodeGen/RegAllocPBQP.cpp b/llvm/lib/CodeGen/RegAllocPBQP.cpp index b22eb080791e..93be8f689d57 100644 --- a/llvm/lib/CodeGen/RegAllocPBQP.cpp +++ b/llvm/lib/CodeGen/RegAllocPBQP.cpp @@ -623,8 +623,8 @@ void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM, // Compute an initial allowed set for the current vreg. std::vector<MCRegister> VRegAllowed; ArrayRef<MCPhysReg> RawPRegOrder = TRC->getRawAllocationOrder(MF); - for (unsigned I = 0; I != RawPRegOrder.size(); ++I) { - MCRegister PReg(RawPRegOrder[I]); + for (MCPhysReg R : RawPRegOrder) { + MCRegister PReg(R); if (MRI.isReserved(PReg)) continue; diff --git a/llvm/lib/CodeGen/RegAllocScore.cpp b/llvm/lib/CodeGen/RegAllocScore.cpp new file mode 100644 index 000000000000..740890831617 --- /dev/null +++ b/llvm/lib/CodeGen/RegAllocScore.cpp @@ -0,0 +1,124 @@ +//===- RegAllocScore.cpp - evaluate regalloc policy quality ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// Calculate a measure of the register allocation policy quality. This is used +/// to construct a reward for the training of the ML-driven allocation policy. +/// Currently, the score is the sum of the machine basic block frequency-weighed +/// number of loads, stores, copies, and remat instructions, each factored with +/// a relative weight. +//===----------------------------------------------------------------------===// + +#include "RegAllocScore.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include <cassert> +#include <cstdint> +#include <numeric> +#include <vector> + +using namespace llvm; +cl::opt<double> CopyWeight("regalloc-copy-weight", cl::init(0.2), cl::Hidden); +cl::opt<double> LoadWeight("regalloc-load-weight", cl::init(4.0), cl::Hidden); +cl::opt<double> StoreWeight("regalloc-store-weight", cl::init(1.0), cl::Hidden); +cl::opt<double> CheapRematWeight("regalloc-cheap-remat-weight", cl::init(0.2), + cl::Hidden); +cl::opt<double> ExpensiveRematWeight("regalloc-expensive-remat-weight", + cl::init(1.0), cl::Hidden); +#define DEBUG_TYPE "regalloc-score" + +RegAllocScore &RegAllocScore::operator+=(const RegAllocScore &Other) { + CopyCounts += Other.copyCounts(); + LoadCounts += Other.loadCounts(); + StoreCounts += Other.storeCounts(); + LoadStoreCounts += Other.loadStoreCounts(); + CheapRematCounts += Other.cheapRematCounts(); + ExpensiveRematCounts += Other.expensiveRematCounts(); + return *this; +} + +bool RegAllocScore::operator==(const RegAllocScore &Other) const { + return copyCounts() == Other.copyCounts() && + loadCounts() == Other.loadCounts() && + storeCounts() == Other.storeCounts() && + loadStoreCounts() == Other.loadStoreCounts() && + cheapRematCounts() == Other.cheapRematCounts() && + expensiveRematCounts() == Other.expensiveRematCounts(); +} + +bool RegAllocScore::operator!=(const RegAllocScore &Other) const { + return !(*this == Other); +} + +double RegAllocScore::getScore() const { + double Ret = 0.0; + Ret += CopyWeight * copyCounts(); + Ret += LoadWeight * loadCounts(); + Ret += StoreWeight * storeCounts(); + Ret += (LoadWeight + StoreWeight) * loadStoreCounts(); + Ret += CheapRematWeight * cheapRematCounts(); + Ret += ExpensiveRematWeight * expensiveRematCounts(); + + return Ret; +} + +RegAllocScore +llvm::calculateRegAllocScore(const MachineFunction &MF, + const MachineBlockFrequencyInfo &MBFI, + AAResults &AAResults) { + return calculateRegAllocScore( + MF, + [&](const MachineBasicBlock &MBB) { + return MBFI.getBlockFreqRelativeToEntryBlock(&MBB); + }, + [&](const MachineInstr &MI) { + return MF.getSubtarget().getInstrInfo()->isTriviallyReMaterializable( + MI, &AAResults); + }); +} + +RegAllocScore llvm::calculateRegAllocScore( + const MachineFunction &MF, + llvm::function_ref<double(const MachineBasicBlock &)> GetBBFreq, + llvm::function_ref<bool(const MachineInstr &)> + IsTriviallyRematerializable) { + RegAllocScore Total; + + for (const MachineBasicBlock &MBB : MF) { + double BlockFreqRelativeToEntrypoint = GetBBFreq(MBB); + RegAllocScore MBBScore; + + for (const MachineInstr &MI : MBB) { + if (MI.isDebugInstr() || MI.isKill() || MI.isInlineAsm()) { + continue; + } + if (MI.isCopy()) { + MBBScore.onCopy(BlockFreqRelativeToEntrypoint); + } else if (IsTriviallyRematerializable(MI)) { + if (MI.getDesc().isAsCheapAsAMove()) { + MBBScore.onCheapRemat(BlockFreqRelativeToEntrypoint); + } else { + MBBScore.onExpensiveRemat(BlockFreqRelativeToEntrypoint); + } + } else if (MI.mayLoad() && MI.mayStore()) { + MBBScore.onLoadStore(BlockFreqRelativeToEntrypoint); + } else if (MI.mayLoad()) { + MBBScore.onLoad(BlockFreqRelativeToEntrypoint); + } else if (MI.mayStore()) { + MBBScore.onStore(BlockFreqRelativeToEntrypoint); + } + } + Total += MBBScore; + } + return Total; +} diff --git a/llvm/lib/CodeGen/RegAllocScore.h b/llvm/lib/CodeGen/RegAllocScore.h new file mode 100644 index 000000000000..3c28bb61189d --- /dev/null +++ b/llvm/lib/CodeGen/RegAllocScore.h @@ -0,0 +1,80 @@ +//==- RegAllocScore.h - evaluate regalloc policy quality ----------*-C++-*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// Calculate a measure of the register allocation policy quality. This is used +/// to construct a reward for the training of the ML-driven allocation policy. +/// Currently, the score is the sum of the machine basic block frequency-weighed +/// number of loads, stores, copies, and remat instructions, each factored with +/// a relative weight. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_REGALLOCSCORE_H_ +#define LLVM_CODEGEN_REGALLOCSCORE_H_ + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/Utils/TFUtils.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/IR/Module.h" +#include <cassert> +#include <cstdint> +#include <limits> + +namespace llvm { + +/// Regalloc score. +class RegAllocScore final { + double CopyCounts = 0.0; + double LoadCounts = 0.0; + double StoreCounts = 0.0; + double CheapRematCounts = 0.0; + double LoadStoreCounts = 0.0; + double ExpensiveRematCounts = 0.0; + +public: + RegAllocScore() = default; + RegAllocScore(const RegAllocScore &) = default; + + double copyCounts() const { return CopyCounts; } + double loadCounts() const { return LoadCounts; } + double storeCounts() const { return StoreCounts; } + double loadStoreCounts() const { return LoadStoreCounts; } + double expensiveRematCounts() const { return ExpensiveRematCounts; } + double cheapRematCounts() const { return CheapRematCounts; } + + void onCopy(double Freq) { CopyCounts += Freq; } + void onLoad(double Freq) { LoadCounts += Freq; } + void onStore(double Freq) { StoreCounts += Freq; } + void onLoadStore(double Freq) { LoadStoreCounts += Freq; } + void onExpensiveRemat(double Freq) { ExpensiveRematCounts += Freq; } + void onCheapRemat(double Freq) { CheapRematCounts += Freq; } + + RegAllocScore &operator+=(const RegAllocScore &Other); + bool operator==(const RegAllocScore &Other) const; + bool operator!=(const RegAllocScore &Other) const; + double getScore() const; +}; + +/// Calculate a score. When comparing 2 scores for the same function but +/// different policies, the better policy would have a smaller score. +/// The implementation is the overload below (which is also easily unittestable) +RegAllocScore calculateRegAllocScore(const MachineFunction &MF, + const MachineBlockFrequencyInfo &MBFI, + AAResults &AAResults); + +/// Implementation of the above, which is also more easily unittestable. +RegAllocScore calculateRegAllocScore( + const MachineFunction &MF, + llvm::function_ref<double(const MachineBasicBlock &)> GetBBFreq, + llvm::function_ref<bool(const MachineInstr &)> IsTriviallyRematerializable); +} // end namespace llvm + +#endif // LLVM_CODEGEN_REGALLOCSCORE_H_ diff --git a/llvm/lib/CodeGen/RegisterClassInfo.cpp b/llvm/lib/CodeGen/RegisterClassInfo.cpp index 797899fb5b86..65a65b9cae95 100644 --- a/llvm/lib/CodeGen/RegisterClassInfo.cpp +++ b/llvm/lib/CodeGen/RegisterClassInfo.cpp @@ -109,8 +109,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { // FIXME: Once targets reserve registers instead of removing them from the // allocation order, we can simply use begin/end here. ArrayRef<MCPhysReg> RawOrder = RC->getRawAllocationOrder(*MF); - for (unsigned i = 0; i != RawOrder.size(); ++i) { - unsigned PhysReg = RawOrder[i]; + for (unsigned PhysReg : RawOrder) { // Remove reserved registers from the allocation order. if (Reserved.test(PhysReg)) continue; diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index 4c8534cf2d01..a917b0d27d4a 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -4067,13 +4067,13 @@ void RegisterCoalescer::joinAllIntervals() { // Coalesce intervals in MBB priority order. unsigned CurrDepth = std::numeric_limits<unsigned>::max(); - for (unsigned i = 0, e = MBBs.size(); i != e; ++i) { + for (MBBPriorityInfo &MBB : MBBs) { // Try coalescing the collected local copies for deeper loops. - if (JoinGlobalCopies && MBBs[i].Depth < CurrDepth) { + if (JoinGlobalCopies && MBB.Depth < CurrDepth) { coalesceLocals(); - CurrDepth = MBBs[i].Depth; + CurrDepth = MBB.Depth; } - copyCoalesceInMBB(MBBs[i].MBB); + copyCoalesceInMBB(MBB.MBB); } lateLiveIntervalUpdate(); coalesceLocals(); diff --git a/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp b/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp index de6129a912d3..49859aeec78b 100644 --- a/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp +++ b/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp @@ -159,20 +159,17 @@ static bool reduceDbgValsBackwardScan(MachineBasicBlock &MBB) { SmallVector<MachineInstr *, 8> DbgValsToBeRemoved; SmallDenseSet<DebugVariable> VariableSet; - for (MachineBasicBlock::reverse_iterator I = MBB.rbegin(), E = MBB.rend(); - I != E; ++I) { - MachineInstr *MI = &*I; - - if (MI->isDebugValue()) { - DebugVariable Var(MI->getDebugVariable(), MI->getDebugExpression(), - MI->getDebugLoc()->getInlinedAt()); + for (MachineInstr &MI : llvm::reverse(MBB)) { + if (MI.isDebugValue()) { + DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(), + MI.getDebugLoc()->getInlinedAt()); auto R = VariableSet.insert(Var); // If it is a DBG_VALUE describing a constant as: // DBG_VALUE 0, ... // we just don't consider such instructions as candidates // for redundant removal. - if (MI->isNonListDebugValue()) { - MachineOperand &Loc = MI->getDebugOperand(0); + if (MI.isNonListDebugValue()) { + MachineOperand &Loc = MI.getDebugOperand(0); if (!Loc.isReg()) { // If we have already encountered this variable, just stop // tracking it. @@ -185,7 +182,7 @@ static bool reduceDbgValsBackwardScan(MachineBasicBlock &MBB) { // We have already encountered the value for this variable, // so this one can be deleted. if (!R.second) - DbgValsToBeRemoved.push_back(MI); + DbgValsToBeRemoved.push_back(&MI); continue; } diff --git a/llvm/lib/CodeGen/SafeStack.cpp b/llvm/lib/CodeGen/SafeStack.cpp index 50d9d64bfcfd..3d8a7eecce18 100644 --- a/llvm/lib/CodeGen/SafeStack.cpp +++ b/llvm/lib/CodeGen/SafeStack.cpp @@ -521,8 +521,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack( StackLayout SSL(StackAlignment); if (StackGuardSlot) { Type *Ty = StackGuardSlot->getAllocatedType(); - uint64_t Align = - std::max(DL.getPrefTypeAlignment(Ty), StackGuardSlot->getAlignment()); + Align Align = std::max(DL.getPrefTypeAlign(Ty), StackGuardSlot->getAlign()); SSL.addObject(StackGuardSlot, getStaticAllocaAllocationSize(StackGuardSlot), Align, SSC.getFullLiveRange()); } @@ -534,8 +533,9 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack( Size = 1; // Don't create zero-sized stack objects. // Ensure the object is properly aligned. - uint64_t Align = - std::max(DL.getPrefTypeAlignment(Ty), Arg->getParamAlignment()); + Align Align = DL.getPrefTypeAlign(Ty); + if (auto A = Arg->getParamAlign()) + Align = std::max(Align, *A); SSL.addObject(Arg, Size, Align, SSC.getFullLiveRange()); } @@ -546,24 +546,24 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack( Size = 1; // Don't create zero-sized stack objects. // Ensure the object is properly aligned. - uint64_t Align = std::max(DL.getPrefTypeAlignment(Ty), AI->getAlignment()); + Align Align = std::max(DL.getPrefTypeAlign(Ty), AI->getAlign()); SSL.addObject(AI, Size, Align, ClColoring ? SSC.getLiveRange(AI) : NoColoringRange); } SSL.computeLayout(); - uint64_t FrameAlignment = SSL.getFrameAlignment(); + Align FrameAlignment = SSL.getFrameAlignment(); // FIXME: tell SSL that we start at a less-then-MaxAlignment aligned location // (AlignmentSkew). if (FrameAlignment > StackAlignment) { // Re-align the base pointer according to the max requested alignment. - assert(isPowerOf2_64(FrameAlignment)); IRB.SetInsertPoint(BasePointer->getNextNode()); BasePointer = cast<Instruction>(IRB.CreateIntToPtr( - IRB.CreateAnd(IRB.CreatePtrToInt(BasePointer, IntPtrTy), - ConstantInt::get(IntPtrTy, ~uint64_t(FrameAlignment - 1))), + IRB.CreateAnd( + IRB.CreatePtrToInt(BasePointer, IntPtrTy), + ConstantInt::get(IntPtrTy, ~(FrameAlignment.value() - 1))), StackPtrTy)); } diff --git a/llvm/lib/CodeGen/SafeStackLayout.cpp b/llvm/lib/CodeGen/SafeStackLayout.cpp index 7cdda7743c16..602afcfa9001 100644 --- a/llvm/lib/CodeGen/SafeStackLayout.cpp +++ b/llvm/lib/CodeGen/SafeStackLayout.cpp @@ -37,7 +37,7 @@ LLVM_DUMP_METHOD void StackLayout::print(raw_ostream &OS) { } } -void StackLayout::addObject(const Value *V, unsigned Size, uint64_t Alignment, +void StackLayout::addObject(const Value *V, unsigned Size, Align Alignment, const StackLifetime::LiveRange &Range) { StackObjects.push_back({V, Size, Alignment, Range}); ObjectAlignments[V] = Alignment; @@ -45,7 +45,7 @@ void StackLayout::addObject(const Value *V, unsigned Size, uint64_t Alignment, } static unsigned AdjustStackOffset(unsigned Offset, unsigned Size, - uint64_t Alignment) { + Align Alignment) { return alignTo(Offset + Size, Alignment) - Size; } @@ -62,7 +62,8 @@ void StackLayout::layoutObject(StackObject &Obj) { } LLVM_DEBUG(dbgs() << "Layout: size " << Obj.Size << ", align " - << Obj.Alignment << ", range " << Obj.Range << "\n"); + << Obj.Alignment.value() << ", range " << Obj.Range + << "\n"); assert(Obj.Alignment <= MaxAlignment); unsigned Start = AdjustStackOffset(0, Obj.Size, Obj.Alignment); unsigned End = Start + Obj.Size; diff --git a/llvm/lib/CodeGen/SafeStackLayout.h b/llvm/lib/CodeGen/SafeStackLayout.h index b72450e57080..4ac7af2059f5 100644 --- a/llvm/lib/CodeGen/SafeStackLayout.h +++ b/llvm/lib/CodeGen/SafeStackLayout.h @@ -22,7 +22,7 @@ namespace safestack { /// Compute the layout of an unsafe stack frame. class StackLayout { - uint64_t MaxAlignment; + Align MaxAlignment; struct StackRegion { unsigned Start; @@ -40,14 +40,14 @@ class StackLayout { struct StackObject { const Value *Handle; unsigned Size; - uint64_t Alignment; + Align Alignment; StackLifetime::LiveRange Range; }; SmallVector<StackObject, 8> StackObjects; DenseMap<const Value *, unsigned> ObjectOffsets; - DenseMap<const Value *, uint64_t> ObjectAlignments; + DenseMap<const Value *, Align> ObjectAlignments; void layoutObject(StackObject &Obj); @@ -56,7 +56,7 @@ public: /// Add an object to the stack frame. Value pointer is opaque and used as a /// handle to retrieve the object's offset in the frame later. - void addObject(const Value *V, unsigned Size, uint64_t Alignment, + void addObject(const Value *V, unsigned Size, Align Alignment, const StackLifetime::LiveRange &Range); /// Run the layout computation for all previously added objects. @@ -66,13 +66,13 @@ public: unsigned getObjectOffset(const Value *V) { return ObjectOffsets[V]; } /// Returns the alignment of the object - uint64_t getObjectAlignment(const Value *V) { return ObjectAlignments[V]; } + Align getObjectAlignment(const Value *V) { return ObjectAlignments[V]; } /// Returns the size of the entire frame. unsigned getFrameSize() { return Regions.empty() ? 0 : Regions.back().End; } /// Returns the alignment of the frame. - uint64_t getFrameAlignment() { return MaxAlignment; } + Align getFrameAlignment() { return MaxAlignment; } void print(raw_ostream &OS); }; diff --git a/llvm/lib/CodeGen/ScheduleDAG.cpp b/llvm/lib/CodeGen/ScheduleDAG.cpp index ef3afab2b730..696b29018ae6 100644 --- a/llvm/lib/CodeGen/ScheduleDAG.cpp +++ b/llvm/lib/CodeGen/ScheduleDAG.cpp @@ -618,8 +618,8 @@ std::vector<int> ScheduleDAGTopologicalSort::GetSubGraph(const SUnit &StartSU, do { const SUnit *SU = WorkList.back(); WorkList.pop_back(); - for (int I = SU->Succs.size()-1; I >= 0; --I) { - const SUnit *Succ = SU->Succs[I].getSUnit(); + for (const SDep &SD : llvm::reverse(SU->Succs)) { + const SUnit *Succ = SD.getSUnit(); unsigned s = Succ->NodeNum; // Edges to non-SUnits are allowed but ignored (e.g. ExitSU). if (Succ->isBoundaryNode()) @@ -652,8 +652,8 @@ std::vector<int> ScheduleDAGTopologicalSort::GetSubGraph(const SUnit &StartSU, do { const SUnit *SU = WorkList.back(); WorkList.pop_back(); - for (int I = SU->Preds.size()-1; I >= 0; --I) { - const SUnit *Pred = SU->Preds[I].getSUnit(); + for (const SDep &SD : llvm::reverse(SU->Preds)) { + const SUnit *Pred = SD.getSUnit(); unsigned s = Pred->NodeNum; // Edges to non-SUnits are allowed but ignored (e.g. EntrySU). if (Pred->isBoundaryNode()) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index df5a041b87cd..067ad819e0d2 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -487,10 +487,7 @@ namespace { SDValue visitFCEIL(SDNode *N); SDValue visitFTRUNC(SDNode *N); SDValue visitFFLOOR(SDNode *N); - SDValue visitFMINNUM(SDNode *N); - SDValue visitFMAXNUM(SDNode *N); - SDValue visitFMINIMUM(SDNode *N); - SDValue visitFMAXIMUM(SDNode *N); + SDValue visitFMinMax(SDNode *N); SDValue visitBRCOND(SDNode *N); SDValue visitBR_CC(SDNode *N); SDValue visitLOAD(SDNode *N); @@ -1701,10 +1698,10 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::FNEG: return visitFNEG(N); case ISD::FABS: return visitFABS(N); case ISD::FFLOOR: return visitFFLOOR(N); - case ISD::FMINNUM: return visitFMINNUM(N); - case ISD::FMAXNUM: return visitFMAXNUM(N); - case ISD::FMINIMUM: return visitFMINIMUM(N); - case ISD::FMAXIMUM: return visitFMAXIMUM(N); + case ISD::FMINNUM: + case ISD::FMAXNUM: + case ISD::FMINIMUM: + case ISD::FMAXIMUM: return visitFMinMax(N); case ISD::FCEIL: return visitFCEIL(N); case ISD::FTRUNC: return visitFTRUNC(N); case ISD::BRCOND: return visitBRCOND(N); @@ -2260,6 +2257,21 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) { EVT VT = N0.getValueType(); SDLoc DL(N); + // fold (add x, undef) -> undef + if (N0.isUndef()) + return N0; + if (N1.isUndef()) + return N1; + + // fold (add c1, c2) -> c1+c2 + if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1})) + return C; + + // canonicalize constant to RHS + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(ISD::ADD, DL, VT, N1, N0); + // fold vector ops if (VT.isVector()) { if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) @@ -2268,23 +2280,6 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) { // fold (add x, 0) -> x, vector edition if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) return N0; - if (ISD::isConstantSplatVectorAllZeros(N0.getNode())) - return N1; - } - - // fold (add x, undef) -> undef - if (N0.isUndef()) - return N0; - - if (N1.isUndef()) - return N1; - - if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) { - // canonicalize constant to RHS - if (!DAG.isConstantIntBuildVectorOrConstantInt(N1)) - return DAG.getNode(ISD::ADD, DL, VT, N1, N0); - // fold (add c1, c2) -> c1+c2 - return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}); } // fold (add x, 0) -> x @@ -2554,6 +2549,19 @@ SDValue DAGCombiner::visitADDSAT(SDNode *N) { EVT VT = N0.getValueType(); SDLoc DL(N); + // fold (add_sat x, undef) -> -1 + if (N0.isUndef() || N1.isUndef()) + return DAG.getAllOnesConstant(DL, VT); + + // fold (add_sat c1, c2) -> c3 + if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1})) + return C; + + // canonicalize constant to RHS + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(Opcode, DL, VT, N1, N0); + // fold vector ops if (VT.isVector()) { // TODO SimplifyVBinOp @@ -2561,20 +2569,6 @@ SDValue DAGCombiner::visitADDSAT(SDNode *N) { // fold (add_sat x, 0) -> x, vector edition if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) return N0; - if (ISD::isConstantSplatVectorAllZeros(N0.getNode())) - return N1; - } - - // fold (add_sat x, undef) -> -1 - if (N0.isUndef() || N1.isUndef()) - return DAG.getAllOnesConstant(DL, VT); - - if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) { - // canonicalize constant to RHS - if (!DAG.isConstantIntBuildVectorOrConstantInt(N1)) - return DAG.getNode(Opcode, DL, VT, N1, N0); - // fold (add_sat c1, c2) -> c3 - return DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}); } // fold (add_sat x, 0) -> x @@ -3260,6 +3254,15 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { EVT VT = N0.getValueType(); SDLoc DL(N); + // fold (sub x, x) -> 0 + // FIXME: Refactor this and xor and other similar operations together. + if (N0 == N1) + return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations); + + // fold (sub c1, c2) -> c3 + if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1})) + return C; + // fold vector ops if (VT.isVector()) { if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) @@ -3270,15 +3273,6 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { return N0; } - // fold (sub x, x) -> 0 - // FIXME: Refactor this and xor and other similar operations together. - if (N0 == N1) - return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations); - - // fold (sub c1, c2) -> c3 - if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1})) - return C; - if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -3611,15 +3605,6 @@ SDValue DAGCombiner::visitSUBSAT(SDNode *N) { EVT VT = N0.getValueType(); SDLoc DL(N); - // fold vector ops - if (VT.isVector()) { - // TODO SimplifyVBinOp - - // fold (sub_sat x, 0) -> x, vector edition - if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) - return N0; - } - // fold (sub_sat x, undef) -> 0 if (N0.isUndef() || N1.isUndef()) return DAG.getConstant(0, DL, VT); @@ -3632,6 +3617,15 @@ SDValue DAGCombiner::visitSUBSAT(SDNode *N) { if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1})) return C; + // fold vector ops + if (VT.isVector()) { + // TODO SimplifyVBinOp + + // fold (sub_sat x, 0) -> x, vector edition + if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) + return N0; + } + // fold (sub_sat x, 0) -> x if (isNullConstant(N1)) return N0; @@ -3781,6 +3775,15 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { if (N0.isUndef() || N1.isUndef()) return DAG.getConstant(0, SDLoc(N), VT); + // fold (mul c1, c2) -> c1*c2 + if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1})) + return C; + + // canonicalize constant to RHS (vector doesn't have to splat) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); + bool N1IsConst = false; bool N1IsOpaqueConst = false; APInt ConstValue1; @@ -3802,15 +3805,6 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { } } - // fold (mul c1, c2) -> c1*c2 - if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1})) - return C; - - // canonicalize constant to RHS (vector doesn't have to splat) - if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && - !DAG.isConstantIntBuildVectorOrConstantInt(N1)) - return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); - // fold (mul x, 0) -> 0 if (N1IsConst && ConstValue1.isZero()) return N1; @@ -4140,17 +4134,17 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { EVT CCVT = getSetCCResultType(VT); SDLoc DL(N); + // fold (sdiv c1, c2) -> c1/c2 + if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1})) + return C; + // fold vector ops if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; - // fold (sdiv c1, c2) -> c1/c2 - ConstantSDNode *N1C = isConstOrConstSplat(N1); - if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1})) - return C; - // fold (sdiv X, -1) -> 0-X + ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N1C && N1C->isAllOnes()) return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0); @@ -4284,17 +4278,17 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { EVT CCVT = getSetCCResultType(VT); SDLoc DL(N); + // fold (udiv c1, c2) -> c1/c2 + if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1})) + return C; + // fold vector ops if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; - // fold (udiv c1, c2) -> c1/c2 - ConstantSDNode *N1C = isConstOrConstSplat(N1); - if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1})) - return C; - // fold (udiv X, -1) -> select(X == -1, 1, 0) + ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N1C && N1C->isAllOnes()) return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), DAG.getConstant(1, DL, VT), @@ -4463,6 +4457,15 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); + // fold (mulhs c1, c2) + if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1})) + return C; + + // canonicalize constant to RHS. + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0); + if (VT.isVector()) { if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; @@ -4474,15 +4477,6 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { return DAG.getConstant(0, DL, VT); } - // fold (mulhs c1, c2) - if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1})) - return C; - - // canonicalize constant to RHS. - if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && - !DAG.isConstantIntBuildVectorOrConstantInt(N1)) - return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0); - // fold (mulhs x, 0) -> 0 if (isNullConstant(N1)) return N1; @@ -4523,6 +4517,15 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); + // fold (mulhu c1, c2) + if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1})) + return C; + + // canonicalize constant to RHS. + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0); + if (VT.isVector()) { if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; @@ -4534,15 +4537,6 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { return DAG.getConstant(0, DL, VT); } - // fold (mulhu c1, c2) - if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1})) - return C; - - // canonicalize constant to RHS. - if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && - !DAG.isConstantIntBuildVectorOrConstantInt(N1)) - return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0); - // fold (mulhu x, 0) -> 0 if (isNullConstant(N1)) return N1; @@ -4786,12 +4780,14 @@ SDValue DAGCombiner::visitMULO(SDNode *N) { } // Function to calculate whether the Min/Max pair of SDNodes (potentially -// swapped around) make a signed saturate pattern, clamping to between -2^(BW-1) -// and 2^(BW-1)-1. Returns the node being clamped and the bitwidth of the clamp -// in BW. Should work with both SMIN/SMAX nodes and setcc/select combo. The -// operands are the same as SimplifySelectCC. N0<N1 ? N2 : N3 +// swapped around) make a signed saturate pattern, clamping to between a signed +// saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW. +// Returns the node being clamped and the bitwidth of the clamp in BW. Should +// work with both SMIN/SMAX nodes and setcc/select combo. The operands are the +// same as SimplifySelectCC. N0<N1 ? N2 : N3. static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2, - SDValue N3, ISD::CondCode CC, unsigned &BW) { + SDValue N3, ISD::CondCode CC, unsigned &BW, + bool &Unsigned) { auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC) { // The compare and select operand should be the same or the select operands @@ -4858,17 +4854,27 @@ static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2, const APInt &MinC = MinCOp->getAPIntValue(); const APInt &MaxC = MaxCOp->getAPIntValue(); APInt MinCPlus1 = MinC + 1; - if (-MaxC != MinCPlus1 || !MinCPlus1.isPowerOf2()) - return SDValue(); - BW = MinCPlus1.exactLogBase2() + 1; - return N02; + if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) { + BW = MinCPlus1.exactLogBase2() + 1; + Unsigned = false; + return N02; + } + + if (MaxC == 0 && MinCPlus1.isPowerOf2()) { + BW = MinCPlus1.exactLogBase2(); + Unsigned = true; + return N02; + } + + return SDValue(); } static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG) { unsigned BW; - SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW); + bool Unsigned; + SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned); if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT) return SDValue(); EVT FPVT = Fp.getOperand(0).getValueType(); @@ -4876,13 +4882,14 @@ static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, if (FPVT.isVector()) NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT, FPVT.getVectorElementCount()); - if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat( - ISD::FP_TO_SINT_SAT, Fp.getOperand(0).getValueType(), NewVT)) + unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT; + if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT)) return SDValue(); SDLoc DL(Fp); - SDValue Sat = DAG.getNode(ISD::FP_TO_SINT_SAT, DL, NewVT, Fp.getOperand(0), + SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0), DAG.getValueType(NewVT.getScalarType())); - return DAG.getSExtOrTrunc(Sat, DL, N2->getValueType(0)); + return Unsigned ? DAG.getZExtOrTrunc(Sat, DL, N2->getValueType(0)) + : DAG.getSExtOrTrunc(Sat, DL, N2->getValueType(0)); } SDValue DAGCombiner::visitIMINMAX(SDNode *N) { @@ -4892,11 +4899,6 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { unsigned Opcode = N->getOpcode(); SDLoc DL(N); - // fold vector ops - if (VT.isVector()) - if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) - return FoldedVOp; - // fold operation with constant operands. if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1})) return C; @@ -4904,7 +4906,12 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { // canonicalize constant to RHS if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) - return DAG.getNode(N->getOpcode(), DL, VT, N1, N0); + return DAG.getNode(Opcode, DL, VT, N1, N0); + + // fold vector ops + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) + return FoldedVOp; // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX. // Only do this if the current op isn't legal and the flipped is. @@ -5777,6 +5784,15 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (N0 == N1) return N0; + // fold (and c1, c2) -> c1&c2 + if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1})) + return C; + + // canonicalize constant to RHS + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); + // fold vector ops if (VT.isVector()) { if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N))) @@ -5824,22 +5840,13 @@ SDValue DAGCombiner::visitAND(SDNode *N) { } } - // fold (and c1, c2) -> c1&c2 - ConstantSDNode *N1C = isConstOrConstSplat(N1); - if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1})) - return C; - - // canonicalize constant to RHS - if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && - !DAG.isConstantIntBuildVectorOrConstantInt(N1)) - return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); - // fold (and x, -1) -> x if (isAllOnesConstant(N1)) return N0; // if (and x, c) is known to be zero, return 0 unsigned BitWidth = VT.getScalarSizeInBits(); + ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth))) return DAG.getConstant(0, SDLoc(N), VT); @@ -6546,21 +6553,25 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if (N0 == N1) return N0; + // fold (or c1, c2) -> c1|c2 + if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1})) + return C; + + // canonicalize constant to RHS + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0); + // fold vector ops if (VT.isVector()) { if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N))) return FoldedVOp; // fold (or x, 0) -> x, vector edition - if (ISD::isConstantSplatVectorAllZeros(N0.getNode())) - return N1; if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) return N0; // fold (or x, -1) -> -1, vector edition - if (ISD::isConstantSplatVectorAllOnes(N0.getNode())) - // do not return N0, because undef node may exist in N0 - return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType()); if (ISD::isConstantSplatVectorAllOnes(N1.getNode())) // do not return N1, because undef node may exist in N1 return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType()); @@ -6629,16 +6640,6 @@ SDValue DAGCombiner::visitOR(SDNode *N) { } } - // fold (or c1, c2) -> c1|c2 - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); - if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1})) - return C; - - // canonicalize constant to RHS - if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && - !DAG.isConstantIntBuildVectorOrConstantInt(N1)) - return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0); - // fold (or x, 0) -> x if (isNullConstant(N1)) return N0; @@ -6651,6 +6652,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return NewSel; // fold (or x, c) -> c iff (x & ~c) == 0 + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) return N1; @@ -7941,18 +7943,6 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { EVT VT = N0.getValueType(); SDLoc DL(N); - // fold vector ops - if (VT.isVector()) { - if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) - return FoldedVOp; - - // fold (xor x, 0) -> x, vector edition - if (ISD::isConstantSplatVectorAllZeros(N0.getNode())) - return N1; - if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) - return N0; - } - // fold (xor undef, undef) -> 0. This is a common idiom (misuse). if (N0.isUndef() && N1.isUndef()) return DAG.getConstant(0, DL, VT); @@ -7969,9 +7959,19 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { // canonicalize constant to RHS if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && - !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::XOR, DL, VT, N1, N0); + // fold vector ops + if (VT.isVector()) { + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) + return FoldedVOp; + + // fold (xor x, 0) -> x, vector edition + if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) + return N0; + } + // fold (xor x, 0) -> x if (isNullConstant(N1)) return N0; @@ -8409,6 +8409,10 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { EVT ShiftVT = N1.getValueType(); unsigned OpSizeInBits = VT.getScalarSizeInBits(); + // fold (shl c1, c2) -> c1<<c2 + if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1})) + return C; + // fold vector ops if (VT.isVector()) { if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N))) @@ -8434,12 +8438,6 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { } } - ConstantSDNode *N1C = isConstOrConstSplat(N1); - - // fold (shl c1, c2) -> c1<<c2 - if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1})) - return C; - if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -8558,6 +8556,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2 // TODO - support non-uniform vector shift amounts. + ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) && N0->getFlags().hasExact()) { if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { @@ -8758,6 +8757,10 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarSizeInBits(); + // fold (sra c1, c2) -> (sra c1, c2) + if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1})) + return C; + // Arithmetic shifting an all-sign-bit value is a no-op. // fold (sra 0, x) -> 0 // fold (sra -1, x) -> -1 @@ -8769,17 +8772,12 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N))) return FoldedVOp; - ConstantSDNode *N1C = isConstOrConstSplat(N1); - - // fold (sra c1, c2) -> (sra c1, c2) - if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1})) - return C; - if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports // sext_inreg. + ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue(); EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits); @@ -8962,21 +8960,20 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarSizeInBits(); + // fold (srl c1, c2) -> c1 >>u c2 + if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1})) + return C; + // fold vector ops if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N))) return FoldedVOp; - ConstantSDNode *N1C = isConstOrConstSplat(N1); - - // fold (srl c1, c2) -> c1 >>u c2 - if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1})) - return C; - if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; // if (srl x, c) is known to be zero, return 0 + ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits))) return DAG.getConstant(0, SDLoc(N), VT); @@ -10043,6 +10040,8 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N); SDValue Mask = MST->getMask(); SDValue Chain = MST->getChain(); + SDValue Value = MST->getValue(); + SDValue Ptr = MST->getBasePtr(); SDLoc DL(N); // Zap masked stores with a zero mask. @@ -10054,12 +10053,50 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() && !MST->isCompressingStore() && !MST->isTruncatingStore()) return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(), - MST->getBasePtr(), MST->getMemOperand()); + MST->getBasePtr(), MST->getPointerInfo(), + MST->getOriginalAlign(), MachineMemOperand::MOStore, + MST->getAAInfo()); // Try transforming N to an indexed store. if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) return SDValue(N, 0); + if (MST->isTruncatingStore() && MST->isUnindexed() && + Value.getValueType().isInteger() && + (!isa<ConstantSDNode>(Value) || + !cast<ConstantSDNode>(Value)->isOpaque())) { + APInt TruncDemandedBits = + APInt::getLowBitsSet(Value.getScalarValueSizeInBits(), + MST->getMemoryVT().getScalarSizeInBits()); + + // See if we can simplify the operation with + // SimplifyDemandedBits, which only works if the value has a single use. + if (SimplifyDemandedBits(Value, TruncDemandedBits)) { + // Re-visit the store if anything changed and the store hasn't been merged + // with another node (N is deleted) SimplifyDemandedBits will add Value's + // node back to the worklist if necessary, but we also need to re-visit + // the Store node itself. + if (N->getOpcode() != ISD::DELETED_NODE) + AddToWorklist(N); + return SDValue(N, 0); + } + } + + // If this is a TRUNC followed by a masked store, fold this into a masked + // truncating store. We can do this even if this is already a masked + // truncstore. + if ((Value.getOpcode() == ISD::TRUNCATE) && Value.getNode()->hasOneUse() && + MST->isUnindexed() && + TLI.canCombineTruncStore(Value.getOperand(0).getValueType(), + MST->getMemoryVT(), LegalOperations)) { + auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(), + Value.getOperand(0).getValueType()); + return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr, + MST->getOffset(), Mask, MST->getMemoryVT(), + MST->getMemOperand(), MST->getAddressingMode(), + /*IsTruncating=*/true); + } + return SDValue(); } @@ -10109,8 +10146,10 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { // FIXME: Can we do this for indexed, expanding, or extending loads? if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() && !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) { - SDValue NewLd = DAG.getLoad(N->getValueType(0), SDLoc(N), MLD->getChain(), - MLD->getBasePtr(), MLD->getMemOperand()); + SDValue NewLd = DAG.getLoad( + N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(), + MLD->getPointerInfo(), MLD->getOriginalAlign(), + MachineMemOperand::MOLoad, MLD->getAAInfo(), MLD->getRanges()); return CombineTo(N, NewLd, NewLd.getValue(1)); } @@ -13876,19 +13915,19 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags)) return R; - // fold vector ops - if (VT.isVector()) - if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) - return FoldedVOp; - // fold (fadd c1, c2) -> c1 + c2 - if (N0CFP && N1CFP) - return DAG.getNode(ISD::FADD, DL, VT, N0, N1); + if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1})) + return C; // canonicalize constant to RHS if (N0CFP && !N1CFP) return DAG.getNode(ISD::FADD, DL, VT, N1, N0); + // fold vector ops + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) + return FoldedVOp; + // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math) ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true); if (N1C && N1C->isZero()) @@ -14084,15 +14123,15 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags)) return R; + // fold (fsub c1, c2) -> c1-c2 + if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1})) + return C; + // fold vector ops if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; - // fold (fsub c1, c2) -> c1-c2 - if (N0CFP && N1CFP) - return DAG.getNode(ISD::FSUB, DL, VT, N0, N1); - if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -14157,7 +14196,6 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { SDValue DAGCombiner::visitFMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true); ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true); EVT VT = N->getValueType(0); SDLoc DL(N); @@ -14168,22 +14206,20 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags)) return R; - // fold vector ops - if (VT.isVector()) { - // This just handles C1 * C2 for vectors. Other vector folds are below. - if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) - return FoldedVOp; - } - // fold (fmul c1, c2) -> c1*c2 - if (N0CFP && N1CFP) - return DAG.getNode(ISD::FMUL, DL, VT, N0, N1); + if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1})) + return C; // canonicalize constant to RHS if (DAG.isConstantFPBuildVectorOrConstantFP(N0) && !DAG.isConstantFPBuildVectorOrConstantFP(N1)) return DAG.getNode(ISD::FMUL, DL, VT, N1, N0); + // fold vector ops + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) + return FoldedVOp; + if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -14495,8 +14531,6 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) { SDValue DAGCombiner::visitFDIV(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); - ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; @@ -14506,15 +14540,15 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags)) return R; + // fold (fdiv c1, c2) -> c1/c2 + if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1})) + return C; + // fold vector ops if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; - // fold (fdiv c1, c2) -> c1/c2 - if (N0CFP && N1CFP) - return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1); - if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -14523,7 +14557,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) { // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. - if (N1CFP) { + if (auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1)) { // Compute the reciprocal 1.0 / c2. const APFloat &N1APF = N1CFP->getValueAPF(); APFloat Recip(N1APF.getSemantics(), 1); // 1.0 @@ -14639,8 +14673,6 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { SDValue DAGCombiner::visitFREM(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); - ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); SDNodeFlags Flags = N->getFlags(); SelectionDAG::FlagInserter FlagsInserter(DAG, N); @@ -14649,9 +14681,9 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { return R; // fold (frem c1, c2) -> fmod(c1,c2) - if (N0CFP && N1CFP) - return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1); - + if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, SDLoc(N), VT, {N0, N1})) + return C; + if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -14712,12 +14744,12 @@ static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) { SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0); - bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1); EVT VT = N->getValueType(0); - if (N0CFP && N1CFP) // Constant fold - return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1); + // fold (fcopysign c1, c2) -> fcopysign(c1,c2) + if (SDValue C = + DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, SDLoc(N), VT, {N0, N1})) + return C; if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) { const APFloat &V = N1C->getValueAPF(); @@ -14835,14 +14867,6 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) { static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI) { - // This optimization is guarded by a function attribute because it may produce - // unexpected results. Ie, programs may be relying on the platform-specific - // undefined behavior when the float-to-int conversion overflows. - const Function &F = DAG.getMachineFunction().getFunction(); - Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow"); - if (StrictOverflow.getValueAsString().equals("false")) - return SDValue(); - // We only do this if the target has legal ftrunc. Otherwise, we'd likely be // replacing casts with a libcall. We also must be allowed to ignore -0.0 // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer @@ -15216,31 +15240,26 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { return SDValue(); } -static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N, - APFloat (*Op)(const APFloat &, const APFloat &)) { +SDValue DAGCombiner::visitFMinMax(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); - const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); - const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); const SDNodeFlags Flags = N->getFlags(); unsigned Opc = N->getOpcode(); bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM; bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM; SelectionDAG::FlagInserter FlagsInserter(DAG, N); - if (N0CFP && N1CFP) { - const APFloat &C0 = N0CFP->getValueAPF(); - const APFloat &C1 = N1CFP->getValueAPF(); - return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT); - } + // Constant fold. + if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1})) + return C; // Canonicalize to constant on RHS. if (DAG.isConstantFPBuildVectorOrConstantFP(N0) && !DAG.isConstantFPBuildVectorOrConstantFP(N1)) return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); - if (N1CFP) { + if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) { const APFloat &AF = N1CFP->getValueAPF(); // minnum(X, nan) -> X @@ -15272,22 +15291,6 @@ static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N, return SDValue(); } -SDValue DAGCombiner::visitFMINNUM(SDNode *N) { - return visitFMinMax(DAG, N, minnum); -} - -SDValue DAGCombiner::visitFMAXNUM(SDNode *N) { - return visitFMinMax(DAG, N, maxnum); -} - -SDValue DAGCombiner::visitFMINIMUM(SDNode *N) { - return visitFMinMax(DAG, N, minimum); -} - -SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) { - return visitFMinMax(DAG, N, maximum); -} - SDValue DAGCombiner::visitFABS(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -18392,8 +18395,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) { if (ST->isUnindexed() && ST->isSimple() && ST1->isUnindexed() && ST1->isSimple()) { - if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value && - ST->getMemoryVT() == ST1->getMemoryVT() && + if (OptLevel != CodeGenOpt::None && ST1->getBasePtr() == Ptr && + ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() && ST->getAddressSpace() == ST1->getAddressSpace()) { // If this is a store followed by a store with the same value to the // same location, then the store is dead/noop. @@ -20727,6 +20730,156 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { return NewLd; } +/// Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)), +/// try to produce VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?), +/// EXTRACT_SUBVECTOR(Op?, ?), +/// Mask')) +/// iff it is legal and profitable to do so. Notably, the trimmed mask +/// (containing only the elements that are extracted) +/// must reference at most two subvectors. +static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N, + SelectionDAG &DAG, + const TargetLowering &TLI, + bool LegalOperations) { + assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && + "Must only be called on EXTRACT_SUBVECTOR's"); + + SDValue N0 = N->getOperand(0); + + // Only deal with non-scalable vectors. + EVT NarrowVT = N->getValueType(0); + EVT WideVT = N0.getValueType(); + if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector()) + return SDValue(); + + // The operand must be a shufflevector. + auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(N0); + if (!WideShuffleVector) + return SDValue(); + + // The old shuffleneeds to go away. + if (!WideShuffleVector->hasOneUse()) + return SDValue(); + + // And the narrow shufflevector that we'll form must be legal. + if (LegalOperations && + !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, NarrowVT)) + return SDValue(); + + uint64_t FirstExtractedEltIdx = N->getConstantOperandVal(1); + int NumEltsExtracted = NarrowVT.getVectorNumElements(); + assert((FirstExtractedEltIdx % NumEltsExtracted) == 0 && + "Extract index is not a multiple of the output vector length."); + + int WideNumElts = WideVT.getVectorNumElements(); + + SmallVector<int, 16> NewMask; + NewMask.reserve(NumEltsExtracted); + SmallSetVector<std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>, 2> + DemandedSubvectors; + + // Try to decode the wide mask into narrow mask from at most two subvectors. + for (int M : WideShuffleVector->getMask().slice(FirstExtractedEltIdx, + NumEltsExtracted)) { + assert((M >= -1) && (M < (2 * WideNumElts)) && + "Out-of-bounds shuffle mask?"); + + if (M < 0) { + // Does not depend on operands, does not require adjustment. + NewMask.emplace_back(M); + continue; + } + + // From which operand of the shuffle does this shuffle mask element pick? + int WideShufOpIdx = M / WideNumElts; + // Which element of that operand is picked? + int OpEltIdx = M % WideNumElts; + + assert((OpEltIdx + WideShufOpIdx * WideNumElts) == M && + "Shuffle mask vector decomposition failure."); + + // And which NumEltsExtracted-sized subvector of that operand is that? + int OpSubvecIdx = OpEltIdx / NumEltsExtracted; + // And which element within that subvector of that operand is that? + int OpEltIdxInSubvec = OpEltIdx % NumEltsExtracted; + + assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx && + "Shuffle mask subvector decomposition failure."); + + assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted + + WideShufOpIdx * WideNumElts) == M && + "Shuffle mask full decomposition failure."); + + SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx); + + if (Op.isUndef()) { + // Picking from an undef operand. Let's adjust mask instead. + NewMask.emplace_back(-1); + continue; + } + + // Profitability check: only deal with extractions from the first subvector. + if (OpSubvecIdx != 0) + return SDValue(); + + const std::pair<SDValue, int> DemandedSubvector = + std::make_pair(Op, OpSubvecIdx); + + if (DemandedSubvectors.insert(DemandedSubvector)) { + if (DemandedSubvectors.size() > 2) + return SDValue(); // We can't handle more than two subvectors. + // How many elements into the WideVT does this subvector start? + int Index = NumEltsExtracted * OpSubvecIdx; + // Bail out if the extraction isn't going to be cheap. + if (!TLI.isExtractSubvectorCheap(NarrowVT, WideVT, Index)) + return SDValue(); + } + + // Ok, but from which operand of the new shuffle will this element pick? + int NewOpIdx = + getFirstIndexOf(DemandedSubvectors.getArrayRef(), DemandedSubvector); + assert((NewOpIdx == 0 || NewOpIdx == 1) && "Unexpected operand index."); + + int AdjM = OpEltIdxInSubvec + NewOpIdx * NumEltsExtracted; + NewMask.emplace_back(AdjM); + } + assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask."); + assert(DemandedSubvectors.size() <= 2 && + "Should have ended up demanding at most two subvectors."); + + // Did we discover that the shuffle does not actually depend on operands? + if (DemandedSubvectors.empty()) + return DAG.getUNDEF(NarrowVT); + + // We still perform the exact same EXTRACT_SUBVECTOR, just on different + // operand[s]/index[es], so there is no point in checking for it's legality. + + // Do not turn a legal shuffle into an illegal one. + if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) && + !TLI.isShuffleMaskLegal(NewMask, NarrowVT)) + return SDValue(); + + SDLoc DL(N); + + SmallVector<SDValue, 2> NewOps; + for (const std::pair<SDValue /*Op*/, int /*SubvectorIndex*/> + &DemandedSubvector : DemandedSubvectors) { + // How many elements into the WideVT does this subvector start? + int Index = NumEltsExtracted * DemandedSubvector.second; + SDValue IndexC = DAG.getVectorIdxConstant(Index, DL); + NewOps.emplace_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT, + DemandedSubvector.first, IndexC)); + } + assert((NewOps.size() == 1 || NewOps.size() == 2) && + "Should end up with either one or two ops"); + + // If we ended up with only one operand, pad with an undef. + if (NewOps.size() == 1) + NewOps.emplace_back(DAG.getUNDEF(NarrowVT)); + + return DAG.getVectorShuffle(NarrowVT, DL, NewOps[0], NewOps[1], NewMask); +} + SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { EVT NVT = N->getValueType(0); SDValue V = N->getOperand(0); @@ -20840,6 +20993,10 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { } } + if (SDValue V = + foldExtractSubvectorFromShuffleVector(N, DAG, TLI, LegalOperations)) + return V; + V = peekThroughBitcasts(V); // If the input is a build vector. Try to make a smaller build vector. @@ -22424,15 +22581,9 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) { SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); - SDValue Ops[] = {LHS, RHS}; unsigned Opcode = N->getOpcode(); SDNodeFlags Flags = N->getFlags(); - // See if we can constant fold the vector operation. - if (SDValue Fold = DAG.FoldConstantArithmetic(Opcode, SDLoc(LHS), - LHS.getValueType(), Ops)) - return Fold; - // Move unary shuffles with identical masks after a vector binop: // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask)) // --> shuffle (VBinOp A, B), Undef, Mask diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 08598eeded7a..5dfb65ef131a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3367,13 +3367,13 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { } case ISD::FSHL: case ISD::FSHR: - if (TLI.expandFunnelShift(Node, Tmp1, DAG)) - Results.push_back(Tmp1); + if (SDValue Expanded = TLI.expandFunnelShift(Node, DAG)) + Results.push_back(Expanded); break; case ISD::ROTL: case ISD::ROTR: - if (TLI.expandROT(Node, true /*AllowVectorOps*/, Tmp1, DAG)) - Results.push_back(Tmp1); + if (SDValue Expanded = TLI.expandROT(Node, true /*AllowVectorOps*/, DAG)) + Results.push_back(Expanded); break; case ISD::SADDSAT: case ISD::UADDSAT: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 1fa4d88fcb4a..518e525e13d0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1277,8 +1277,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N, bool IsVP) { SDValue DAGTypeLegalizer::PromoteIntRes_Rotate(SDNode *N) { // Lower the rotate to shifts and ORs which can be promoted. - SDValue Res; - TLI.expandROT(N, true /*AllowVectorOps*/, Res, DAG); + SDValue Res = TLI.expandROT(N, true /*AllowVectorOps*/, DAG); ReplaceValueWith(SDValue(N, 0), Res); return SDValue(); } @@ -1286,7 +1285,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Rotate(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_FunnelShift(SDNode *N) { SDValue Hi = GetPromotedInteger(N->getOperand(0)); SDValue Lo = GetPromotedInteger(N->getOperand(1)); - SDValue Amount = GetPromotedInteger(N->getOperand(2)); + SDValue Amt = GetPromotedInteger(N->getOperand(2)); SDLoc DL(N); EVT OldVT = N->getOperand(0).getValueType(); @@ -1297,21 +1296,20 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FunnelShift(SDNode *N) { unsigned NewBits = VT.getScalarSizeInBits(); // Amount has to be interpreted modulo the old bit width. - Amount = - DAG.getNode(ISD::UREM, DL, VT, Amount, DAG.getConstant(OldBits, DL, VT)); + Amt = DAG.getNode(ISD::UREM, DL, VT, Amt, DAG.getConstant(OldBits, DL, VT)); // If the promoted type is twice the size (or more), then we use the // traditional funnel 'double' shift codegen. This isn't necessary if the // shift amount is constant. // fshl(x,y,z) -> (((aext(x) << bw) | zext(y)) << (z % bw)) >> bw. // fshr(x,y,z) -> (((aext(x) << bw) | zext(y)) >> (z % bw)). - if (NewBits >= (2 * OldBits) && !isa<ConstantSDNode>(Amount) && + if (NewBits >= (2 * OldBits) && !isa<ConstantSDNode>(Amt) && !TLI.isOperationLegalOrCustom(Opcode, VT)) { SDValue HiShift = DAG.getConstant(OldBits, DL, VT); Hi = DAG.getNode(ISD::SHL, DL, VT, Hi, HiShift); Lo = DAG.getZeroExtendInReg(Lo, DL, OldVT); SDValue Res = DAG.getNode(ISD::OR, DL, VT, Hi, Lo); - Res = DAG.getNode(IsFSHR ? ISD::SRL : ISD::SHL, DL, VT, Res, Amount); + Res = DAG.getNode(IsFSHR ? ISD::SRL : ISD::SHL, DL, VT, Res, Amt); if (!IsFSHR) Res = DAG.getNode(ISD::SRL, DL, VT, Res, HiShift); return Res; @@ -1324,9 +1322,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FunnelShift(SDNode *N) { // Increase Amount to shift the result into the lower bits of the promoted // type. if (IsFSHR) - Amount = DAG.getNode(ISD::ADD, DL, VT, Amount, ShiftOffset); + Amt = DAG.getNode(ISD::ADD, DL, VT, Amt, ShiftOffset); - return DAG.getNode(Opcode, DL, VT, Hi, Lo, Amount); + return DAG.getNode(Opcode, DL, VT, Hi, Lo, Amt); } SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) { diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 98312f91d8c0..03dcd0f6d2c9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -83,7 +83,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { SDValue Res(&Node, i); bool Failed = false; // Don't create a value in map. - auto ResId = (ValueToIdMap.count(Res)) ? ValueToIdMap[Res] : 0; + auto ResId = ValueToIdMap.lookup(Res); unsigned Mapped = 0; if (ResId && (ReplacedValues.find(ResId) != ReplacedValues.end())) { @@ -301,7 +301,7 @@ ScanOperands: if (IgnoreNodeResults(N->getOperand(i).getNode())) continue; - const auto Op = N->getOperand(i); + const auto &Op = N->getOperand(i); LLVM_DEBUG(dbgs() << "Analyzing operand: "; Op.dump(&DAG)); EVT OpVT = Op.getValueType(); switch (getTypeAction(OpVT)) { @@ -1007,11 +1007,7 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) { /// /// ValVT is the type of values that produced the boolean. SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT ValVT) { - SDLoc dl(Bool); - EVT BoolVT = getSetCCResultType(ValVT); - ISD::NodeType ExtendCode = - TargetLowering::getExtendForContent(TLI.getBooleanContents(ValVT)); - return DAG.getNode(ExtendCode, dl, BoolVT, Bool); + return TLI.promoteTargetBoolean(DAG, Bool, ValVT); } /// Return the lower LoVT bits of Op in Lo and the upper HiVT bits in Hi. diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 88a28a3be53e..1493f36fcd3e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -254,69 +254,6 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { SDNode *Node = DAG.UpdateNodeOperands(Op.getNode(), Ops); - if (Op.getOpcode() == ISD::LOAD) { - LoadSDNode *LD = cast<LoadSDNode>(Node); - ISD::LoadExtType ExtType = LD->getExtensionType(); - if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) { - LLVM_DEBUG(dbgs() << "\nLegalizing extending vector load: "; - Node->dump(&DAG)); - switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getValueType(0), - LD->getMemoryVT())) { - default: llvm_unreachable("This action is not supported yet!"); - case TargetLowering::Legal: - return TranslateLegalizeResults(Op, Node); - case TargetLowering::Custom: { - SmallVector<SDValue, 2> ResultVals; - if (LowerOperationWrapper(Node, ResultVals)) { - if (ResultVals.empty()) - return TranslateLegalizeResults(Op, Node); - - Changed = true; - return RecursivelyLegalizeResults(Op, ResultVals); - } - LLVM_FALLTHROUGH; - } - case TargetLowering::Expand: { - Changed = true; - std::pair<SDValue, SDValue> Tmp = ExpandLoad(Node); - AddLegalizedOperand(Op.getValue(0), Tmp.first); - AddLegalizedOperand(Op.getValue(1), Tmp.second); - return Op.getResNo() ? Tmp.first : Tmp.second; - } - } - } - } else if (Op.getOpcode() == ISD::STORE) { - StoreSDNode *ST = cast<StoreSDNode>(Node); - EVT StVT = ST->getMemoryVT(); - MVT ValVT = ST->getValue().getSimpleValueType(); - if (StVT.isVector() && ST->isTruncatingStore()) { - LLVM_DEBUG(dbgs() << "\nLegalizing truncating vector store: "; - Node->dump(&DAG)); - switch (TLI.getTruncStoreAction(ValVT, StVT)) { - default: llvm_unreachable("This action is not supported yet!"); - case TargetLowering::Legal: - return TranslateLegalizeResults(Op, Node); - case TargetLowering::Custom: { - SmallVector<SDValue, 1> ResultVals; - if (LowerOperationWrapper(Node, ResultVals)) { - if (ResultVals.empty()) - return TranslateLegalizeResults(Op, Node); - - Changed = true; - return RecursivelyLegalizeResults(Op, ResultVals); - } - LLVM_FALLTHROUGH; - } - case TargetLowering::Expand: { - Changed = true; - SDValue Chain = ExpandStore(Node); - AddLegalizedOperand(Op, Chain); - return Chain; - } - } - } - } - bool HasVectorValueOrOp = llvm::any_of(Node->values(), [](EVT T) { return T.isVector(); }) || llvm::any_of(Node->op_values(), @@ -329,6 +266,22 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { switch (Op.getOpcode()) { default: return TranslateLegalizeResults(Op, Node); + case ISD::LOAD: { + LoadSDNode *LD = cast<LoadSDNode>(Node); + ISD::LoadExtType ExtType = LD->getExtensionType(); + EVT LoadedVT = LD->getMemoryVT(); + if (LoadedVT.isVector() && ExtType != ISD::NON_EXTLOAD) + Action = TLI.getLoadExtAction(ExtType, LD->getValueType(0), LoadedVT); + break; + } + case ISD::STORE: { + StoreSDNode *ST = cast<StoreSDNode>(Node); + EVT StVT = ST->getMemoryVT(); + MVT ValVT = ST->getValue().getSimpleValueType(); + if (StVT.isVector() && ST->isTruncatingStore()) + Action = TLI.getTruncStoreAction(ValVT, StVT); + break; + } case ISD::MERGE_VALUES: Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); // This operation lies about being legal: when it claims to be legal, @@ -512,6 +465,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { switch (Action) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Promote: + assert((Op.getOpcode() != ISD::LOAD && Op.getOpcode() != ISD::STORE) && + "This action is not supported yet!"); LLVM_DEBUG(dbgs() << "Promoting\n"); Promote(Node, ResultVals); assert(!ResultVals.empty() && "No results for promotion?"); @@ -731,8 +686,16 @@ SDValue VectorLegalizer::ExpandStore(SDNode *N) { } void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { - SDValue Tmp; switch (Node->getOpcode()) { + case ISD::LOAD: { + std::pair<SDValue, SDValue> Tmp = ExpandLoad(Node); + Results.push_back(Tmp.first); + Results.push_back(Tmp.second); + return; + } + case ISD::STORE: + Results.push_back(ExpandStore(Node)); + return; case ISD::MERGE_VALUES: for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) Results.push_back(Node->getOperand(i)); @@ -804,15 +767,15 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { break; case ISD::FSHL: case ISD::FSHR: - if (TLI.expandFunnelShift(Node, Tmp, DAG)) { - Results.push_back(Tmp); + if (SDValue Expanded = TLI.expandFunnelShift(Node, DAG)) { + Results.push_back(Expanded); return; } break; case ISD::ROTL: case ISD::ROTR: - if (TLI.expandROT(Node, false /*AllowVectorOps*/, Tmp, DAG)) { - Results.push_back(Tmp); + if (SDValue Expanded = TLI.expandROT(Node, false /*AllowVectorOps*/, DAG)) { + Results.push_back(Expanded); return; } break; diff --git a/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index 2695ed36991c..3d5c4c5b1cae 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -168,10 +168,9 @@ void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) { SUnits = &sunits; NumNodesSolelyBlocking.resize(SUnits->size(), 0); - for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { - SUnit *SU = &(*SUnits)[i]; - initNumRegDefsLeft(SU); - SU->NodeQueueId = 0; + for (SUnit &SU : *SUnits) { + initNumRegDefsLeft(&SU); + SU.NodeQueueId = 0; } } diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 84e6d2a16422..aec2cf38b400 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -442,33 +442,32 @@ void ScheduleDAGSDNodes::AddSchedEdges() { bool UnitLatencies = forceUnitLatencies(); // Pass 2: add the preds, succs, etc. - for (unsigned su = 0, e = SUnits.size(); su != e; ++su) { - SUnit *SU = &SUnits[su]; - SDNode *MainNode = SU->getNode(); + for (SUnit &SU : SUnits) { + SDNode *MainNode = SU.getNode(); if (MainNode->isMachineOpcode()) { unsigned Opc = MainNode->getMachineOpcode(); const MCInstrDesc &MCID = TII->get(Opc); for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { - SU->isTwoAddress = true; + SU.isTwoAddress = true; break; } } if (MCID.isCommutable()) - SU->isCommutable = true; + SU.isCommutable = true; } // Find all predecessors and successors of the group. - for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) { + for (SDNode *N = SU.getNode(); N; N = N->getGluedNode()) { if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).getImplicitDefs()) { - SU->hasPhysRegClobbers = true; + SU.hasPhysRegClobbers = true; unsigned NumUsed = InstrEmitter::CountResults(N); while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1)) --NumUsed; // Skip over unused values at the end. if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs()) - SU->hasPhysRegDefs = true; + SU.hasPhysRegDefs = true; } for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { @@ -477,7 +476,8 @@ void ScheduleDAGSDNodes::AddSchedEdges() { if (isPassiveNode(OpN)) continue; // Not scheduled. SUnit *OpSU = &SUnits[OpN->getNodeId()]; assert(OpSU && "Node has no SUnit!"); - if (OpSU == SU) continue; // In the same group. + if (OpSU == &SU) + continue; // In the same group. EVT OpVT = N->getOperand(i).getValueType(); assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!"); @@ -508,10 +508,10 @@ void ScheduleDAGSDNodes::AddSchedEdges() { Dep.setLatency(OpLatency); if (!isChain && !UnitLatencies) { computeOperandLatency(OpN, N, i, Dep); - ST.adjustSchedDependency(OpSU, DefIdx, SU, i, Dep); + ST.adjustSchedDependency(OpSU, DefIdx, &SU, i, Dep); } - if (!SU->addPred(Dep) && !Dep.isCtrl() && OpSU->NumRegDefsLeft > 1) { + if (!SU.addPred(Dep) && !Dep.isCtrl() && OpSU->NumRegDefsLeft > 1) { // Multiple register uses are combined in the same SUnit. For example, // we could have a set of glued nodes with all their defs consumed by // another set of glued nodes. Register pressure tracking sees this as @@ -721,10 +721,7 @@ void ScheduleDAGSDNodes::dumpSchedule() const { /// void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) { unsigned ScheduledNodes = ScheduleDAG::VerifyScheduledDAG(isBottomUp); - unsigned Noops = 0; - for (unsigned i = 0, e = Sequence.size(); i != e; ++i) - if (!Sequence[i]) - ++Noops; + unsigned Noops = llvm::count(Sequence, nullptr); assert(Sequence.size() - Noops == ScheduledNodes && "The number of nodes scheduled doesn't match the expected number!"); } @@ -911,8 +908,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { } } - for (unsigned i = 0, e = Sequence.size(); i != e; i++) { - SUnit *SU = Sequence[i]; + for (SUnit *SU : Sequence) { if (!SU) { // Null SUnit* is a noop. TII->insertNoop(*Emitter.getBlock(), InsertPos); diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index 540a6e3efbe1..10940478010e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -169,11 +169,11 @@ void ScheduleDAGVLIW::listScheduleTopDown() { releaseSuccessors(&EntrySU); // All leaves to AvailableQueue. - for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + for (SUnit &SU : SUnits) { // It is available if it has no predecessors. - if (SUnits[i].Preds.empty()) { - AvailableQueue->push(&SUnits[i]); - SUnits[i].isAvailable = true; + if (SU.Preds.empty()) { + AvailableQueue->push(&SU); + SU.isAvailable = true; } } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index c282e03387dd..2ae0d4df7b77 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2499,7 +2499,8 @@ bool SelectionDAG::MaskedValueIsAllOnes(SDValue V, const APInt &Mask, /// sense to specify which elements are demanded or undefined, therefore /// they are simply ignored. bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, - APInt &UndefElts, unsigned Depth) { + APInt &UndefElts, unsigned Depth) const { + unsigned Opcode = V.getOpcode(); EVT VT = V.getValueType(); assert(VT.isVector() && "Vector type expected"); @@ -2511,7 +2512,7 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, // Deal with some common cases here that work for both fixed and scalable // vector types. - switch (V.getOpcode()) { + switch (Opcode) { case ISD::SPLAT_VECTOR: UndefElts = V.getOperand(0).isUndef() ? APInt::getAllOnes(DemandedElts.getBitWidth()) @@ -2537,7 +2538,12 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: return isSplatValue(V.getOperand(0), DemandedElts, UndefElts, Depth + 1); - } + default: + if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN || + Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::INTRINSIC_VOID) + return TLI->isSplatValueForTargetNode(V, DemandedElts, UndefElts, Depth); + break; +} // We don't support other cases than those above for scalable vectors at // the moment. @@ -2548,7 +2554,7 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, assert(NumElts == DemandedElts.getBitWidth() && "Vector size mismatch"); UndefElts = APInt::getZero(NumElts); - switch (V.getOpcode()) { + switch (Opcode) { case ISD::BUILD_VECTOR: { SDValue Scl; for (unsigned i = 0; i != NumElts; ++i) { @@ -2600,13 +2606,30 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, } break; } + case ISD::ANY_EXTEND_VECTOR_INREG: + case ISD::SIGN_EXTEND_VECTOR_INREG: + case ISD::ZERO_EXTEND_VECTOR_INREG: { + // Widen the demanded elts by the src element count. + SDValue Src = V.getOperand(0); + // We don't support scalable vectors at the moment. + if (Src.getValueType().isScalableVector()) + return false; + unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + APInt UndefSrcElts; + APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts); + if (isSplatValue(Src, DemandedSrcElts, UndefSrcElts, Depth + 1)) { + UndefElts = UndefSrcElts.truncOrSelf(NumElts); + return true; + } + break; + } } return false; } /// Helper wrapper to main isSplatValue function. -bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) { +bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) const { EVT VT = V.getValueType(); assert(VT.isVector() && "Vector type expected"); @@ -5291,9 +5314,10 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, if (isUndef(Opcode, Ops)) return getUNDEF(VT); - // Handle the case of two scalars. + // Handle binops special cases. if (NumOps == 2) { - // TODO: Move foldConstantFPMath here? + if (SDValue CFP = foldConstantFPMath(Opcode, DL, VT, Ops[0], Ops[1])) + return CFP; if (auto *C1 = dyn_cast<ConstantSDNode>(Ops[0])) { if (auto *C2 = dyn_cast<ConstantSDNode>(Ops[1])) { @@ -5463,10 +5487,11 @@ SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL, // should. That will require dealing with a potentially non-default // rounding mode, checking the "opStatus" return value from the APFloat // math calculations, and possibly other variations. - auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1.getNode()); - auto *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode()); + ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, /*AllowUndefs*/ false); + ConstantFPSDNode *N2CFP = isConstOrConstSplatFP(N2, /*AllowUndefs*/ false); if (N1CFP && N2CFP) { - APFloat C1 = N1CFP->getValueAPF(), C2 = N2CFP->getValueAPF(); + APFloat C1 = N1CFP->getValueAPF(); // make copy + const APFloat &C2 = N2CFP->getValueAPF(); switch (Opcode) { case ISD::FADD: C1.add(C2, APFloat::rmNearestTiesToEven); @@ -5486,6 +5511,14 @@ SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL, case ISD::FCOPYSIGN: C1.copySign(C2); return getConstantFP(C1, DL, VT); + case ISD::FMINNUM: + return getConstantFP(minnum(C1, C2), DL, VT); + case ISD::FMAXNUM: + return getConstantFP(maxnum(C1, C2), DL, VT); + case ISD::FMINIMUM: + return getConstantFP(minimum(C1, C2), DL, VT); + case ISD::FMAXIMUM: + return getConstantFP(maximum(C1, C2), DL, VT); default: break; } } @@ -5502,8 +5535,9 @@ SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL, switch (Opcode) { case ISD::FSUB: // -0.0 - undef --> undef (consistent with "fneg undef") - if (N1CFP && N1CFP->getValueAPF().isNegZero() && N2.isUndef()) - return getUNDEF(VT); + if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, /*AllowUndefs*/ true)) + if (N1C && N1C->getValueAPF().isNegZero() && N2.isUndef()) + return getUNDEF(VT); LLVM_FALLTHROUGH; case ISD::FADD: @@ -5962,9 +5996,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2})) return SV; - if (SDValue V = foldConstantFPMath(Opcode, DL, VT, N1, N2)) - return V; - // Canonicalize an UNDEF to the RHS, even over a constant. if (N1.isUndef()) { if (TLI->isCommutativeBinOp(Opcode)) { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 7726a0007e44..63cd723cf6da 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1036,7 +1036,6 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa, AA = aa; GFI = gfi; LibInfo = li; - DL = &DAG.getDataLayout(); Context = DAG.getContext(); LPadToCallSiteMap.clear(); SL->init(DAG.getTargetLoweringInfo(), TM, DAG.getDataLayout()); @@ -1626,6 +1625,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { if (const auto *Equiv = dyn_cast<DSOLocalEquivalent>(C)) return getValue(Equiv->getGlobalValue()); + if (const auto *NC = dyn_cast<NoCFIValue>(C)) + return getValue(NC->getGlobalValue()); + VectorType *VecTy = cast<VectorType>(V->getType()); // Now that we know the number and type of the elements, get that number of @@ -1921,8 +1923,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { DAG.getDataLayout().getAllocaAddrSpace()), PtrValueVTs); - SDValue RetPtr = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), - DemoteReg, PtrValueVTs[0]); + SDValue RetPtr = + DAG.getCopyFromReg(Chain, getCurSDLoc(), DemoteReg, PtrValueVTs[0]); SDValue RetOp = getValue(I.getOperand(0)); SmallVector<EVT, 4> ValueVTs, MemVTs; @@ -2657,7 +2659,8 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, SDLoc dl = getCurSDLoc(); SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy); const Module &M = *ParentBB->getParent()->getFunction().getParent(); - Align Align = DL->getPrefTypeAlign(Type::getInt8PtrTy(M.getContext())); + Align Align = + DAG.getDataLayout().getPrefTypeAlign(Type::getInt8PtrTy(M.getContext())); // Generate code to load the content of the guard slot. SDValue GuardVal = DAG.getLoad( @@ -3058,14 +3061,14 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last) { // Update JTCases. - for (unsigned i = 0, e = SL->JTCases.size(); i != e; ++i) - if (SL->JTCases[i].first.HeaderBB == First) - SL->JTCases[i].first.HeaderBB = Last; + for (JumpTableBlock &JTB : SL->JTCases) + if (JTB.first.HeaderBB == First) + JTB.first.HeaderBB = Last; // Update BitTestCases. - for (unsigned i = 0, e = SL->BitTestCases.size(); i != e; ++i) - if (SL->BitTestCases[i].Parent == First) - SL->BitTestCases[i].Parent = Last; + for (BitTestBlock &BTB : SL->BitTestCases) + if (BTB.Parent == First) + BTB.Parent = Last; } void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { @@ -3111,6 +3114,8 @@ void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) { void SelectionDAGBuilder::visitUnary(const User &I, unsigned Opcode) { SDNodeFlags Flags; + if (auto *FPOp = dyn_cast<FPMathOperator>(&I)) + Flags.copyFMF(*FPOp); SDValue Op = getValue(I.getOperand(0)); SDValue UnNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op.getValueType(), @@ -3881,7 +3886,8 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue(); if (Field) { // N = N + Offset - uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field); + uint64_t Offset = + DAG.getDataLayout().getStructLayout(StTy)->getElementOffset(Field); // In an inbounds GEP with an offset that is nonnegative even when // interpreted as signed, assume there is no unsigned overflow. @@ -3898,7 +3904,8 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { // (and fix up the result later). unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS); MVT IdxTy = MVT::getIntegerVT(IdxSize); - TypeSize ElementSize = DL->getTypeAllocSize(GTI.getIndexedType()); + TypeSize ElementSize = + DAG.getDataLayout().getTypeAllocSize(GTI.getIndexedType()); // We intentionally mask away the high bits here; ElementSize may not // fit in IdxTy. APInt ElementMul(IdxSize, ElementSize.getKnownMinSize()); @@ -4788,7 +4795,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, } // Use TargetConstant instead of a regular constant for immarg. - EVT VT = TLI.getValueType(*DL, Arg->getType(), true); + EVT VT = TLI.getValueType(DAG.getDataLayout(), Arg->getType(), true); if (const ConstantInt *CI = dyn_cast<ConstantInt>(Arg)) { assert(CI->getBitWidth() <= 64 && "large intrinsic immediates not handled"); @@ -6571,7 +6578,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, } else { EVT PtrTy = TLI.getValueType(DAG.getDataLayout(), I.getType()); const Value *Global = TLI.getSDagStackGuard(M); - Align Align = DL->getPrefTypeAlign(Global->getType()); + Align Align = DAG.getDataLayout().getPrefTypeAlign(Global->getType()); Res = DAG.getLoad(PtrTy, sdl, Chain, getValue(Global), MachinePointerInfo(Global, 0), Align, MachineMemOperand::MOVolatile); @@ -7127,12 +7134,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, } SDValue VectorStep = DAG.getStepVector(sdl, VecTy); SDValue VectorInduction = DAG.getNode( - ISD::UADDO, sdl, DAG.getVTList(VecTy, CCVT), VectorIndex, VectorStep); - SDValue SetCC = DAG.getSetCC(sdl, CCVT, VectorInduction.getValue(0), + ISD::UADDSAT, sdl, VecTy, VectorIndex, VectorStep); + SDValue SetCC = DAG.getSetCC(sdl, CCVT, VectorInduction, VectorTripCount, ISD::CondCode::SETULT); - setValue(&I, DAG.getNode(ISD::AND, sdl, CCVT, - DAG.getNOT(sdl, VectorInduction.getValue(1), CCVT), - SetCC)); + setValue(&I, SetCC); return; } case Intrinsic::experimental_vector_insert: { @@ -7317,32 +7322,26 @@ static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) { void SelectionDAGBuilder::visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT, SmallVector<SDValue, 7> &OpValues, - bool isGather) { + bool IsGather) { SDLoc DL = getCurSDLoc(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); Value *PtrOperand = VPIntrin.getArgOperand(0); - MaybeAlign Alignment = DAG.getEVTAlign(VT); + MaybeAlign Alignment = VPIntrin.getPointerAlignment(); + if (!Alignment) + Alignment = DAG.getEVTAlign(VT); AAMDNodes AAInfo = VPIntrin.getAAMetadata(); const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range); SDValue LD; bool AddToChain = true; - if (!isGather) { + if (!IsGather) { // Do not serialize variable-length loads of constant memory with // anything. - MemoryLocation ML; - if (VT.isScalableVector()) - ML = MemoryLocation::getAfter(PtrOperand); - else - ML = MemoryLocation( - PtrOperand, - LocationSize::precise( - DAG.getDataLayout().getTypeStoreSize(VPIntrin.getType())), - AAInfo); + MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo); AddToChain = !AA || !AA->pointsToConstantMemory(ML); SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad, - VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo, Ranges); + MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges); LD = DAG.getLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2], MMO, false /*IsExpanding */); } else { @@ -7380,18 +7379,20 @@ void SelectionDAGBuilder::visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT, void SelectionDAGBuilder::visitVPStoreScatter(const VPIntrinsic &VPIntrin, SmallVector<SDValue, 7> &OpValues, - bool isScatter) { + bool IsScatter) { SDLoc DL = getCurSDLoc(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); Value *PtrOperand = VPIntrin.getArgOperand(1); EVT VT = OpValues[0].getValueType(); - MaybeAlign Alignment = DAG.getEVTAlign(VT); + MaybeAlign Alignment = VPIntrin.getPointerAlignment(); + if (!Alignment) + Alignment = DAG.getEVTAlign(VT); AAMDNodes AAInfo = VPIntrin.getAAMetadata(); SDValue ST; - if (!isScatter) { + if (!IsScatter) { MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore, - VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo); + MemoryLocation::UnknownSize, *Alignment, AAInfo); ST = DAG.getStoreVP(getMemoryRoot(), DL, OpValues[0], OpValues[1], OpValues[2], OpValues[3], MMO, false /* IsTruncating */); @@ -7690,8 +7691,9 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput), PointerType::getUnqual(LoadTy)); - if (const Constant *LoadCst = ConstantFoldLoadFromConstPtr( - const_cast<Constant *>(LoadInput), LoadTy, *Builder.DL)) + if (const Constant *LoadCst = + ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput), + LoadTy, Builder.DAG.getDataLayout())) return Builder.getValue(LoadCst); } @@ -9646,8 +9648,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // We push in swifterror return as the last element of CLI.Ins. ArgListTy &Args = CLI.getArgs(); if (supportSwiftError()) { - for (unsigned i = 0, e = Args.size(); i != e; ++i) { - if (Args[i].IsSwiftError) { + for (const ArgListEntry &Arg : Args) { + if (Arg.IsSwiftError) { ISD::InputArg MyFlags; MyFlags.VT = getPointerTy(DL); MyFlags.ArgVT = EVT(getPointerTy(DL)); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index d6122aa0a739..ea48042a5dcf 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -190,7 +190,6 @@ public: static const unsigned LowestSDNodeOrder = 1; SelectionDAG &DAG; - const DataLayout *DL = nullptr; AAResults *AA = nullptr; const TargetLibraryInfo *LibInfo; @@ -568,9 +567,9 @@ private: void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic); void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI); void visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT, - SmallVector<SDValue, 7> &OpValues, bool isGather); + SmallVector<SDValue, 7> &OpValues, bool IsGather); void visitVPStoreScatter(const VPIntrinsic &VPIntrin, - SmallVector<SDValue, 7> &OpValues, bool isScatter); + SmallVector<SDValue, 7> &OpValues, bool IsScatter); void visitVectorPredicationIntrinsic(const VPIntrinsic &VPIntrin); void visitVAStart(const CallInst &I); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index c7e37cf8ca14..77e11b364588 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -297,7 +297,7 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, #ifndef NDEBUG dbgs() << "If a target marks an instruction with " "'usesCustomInserter', it must implement " - "TargetLowering::EmitInstrWithCustomInserter!"; + "TargetLowering::EmitInstrWithCustomInserter!\n"; #endif llvm_unreachable(nullptr); } @@ -1784,27 +1784,25 @@ SelectionDAGISel::FinishBasicBlock() { } // Update PHI Nodes - for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size(); - pi != pe; ++pi) { - MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[pi].first); + for (const std::pair<MachineInstr *, unsigned> &P : + FuncInfo->PHINodesToUpdate) { + MachineInstrBuilder PHI(*MF, P.first); MachineBasicBlock *PHIBB = PHI->getParent(); assert(PHI->isPHI() && "This is not a machine PHI node that we are updating!"); // This is "default" BB. We have two jumps to it. From "header" BB and // from last "case" BB, unless the latter was skipped. if (PHIBB == BTB.Default) { - PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(BTB.Parent); + PHI.addReg(P.second).addMBB(BTB.Parent); if (!BTB.ContiguousRange) { - PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second) - .addMBB(BTB.Cases.back().ThisBB); + PHI.addReg(P.second).addMBB(BTB.Cases.back().ThisBB); } } // One of "cases" BB. - for (unsigned j = 0, ej = BTB.Cases.size(); - j != ej; ++j) { - MachineBasicBlock* cBB = BTB.Cases[j].ThisBB; + for (const SwitchCG::BitTestCase &BT : BTB.Cases) { + MachineBasicBlock* cBB = BT.ThisBB; if (cBB->isSuccessor(PHIBB)) - PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(cBB); + PHI.addReg(P.second).addMBB(cBB); } } } diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 737695b5eabe..e6b06ab93d6b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3136,6 +3136,19 @@ bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op, return false; } +bool TargetLowering::isSplatValueForTargetNode(SDValue Op, + const APInt &DemandedElts, + APInt &UndefElts, + unsigned Depth) const { + assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) && + "Should use isSplatValue if you don't know whether Op" + " is a target node!"); + return false; +} + // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must // work with truncating build vectors and vectors with elements of less than // 8 bits. @@ -4853,13 +4866,9 @@ TargetLowering::ParseConstraints(const DataLayout &DL, } // Now select chosen alternative in each constraint. - for (unsigned cIndex = 0, eIndex = ConstraintOperands.size(); - cIndex != eIndex; ++cIndex) { - AsmOperandInfo &cInfo = ConstraintOperands[cIndex]; - if (cInfo.Type == InlineAsm::isClobber) - continue; - cInfo.selectAlternative(bestMAIndex); - } + for (AsmOperandInfo &cInfo : ConstraintOperands) + if (cInfo.Type != InlineAsm::isClobber) + cInfo.selectAlternative(bestMAIndex); } } @@ -4927,9 +4936,9 @@ TargetLowering::ConstraintWeight ConstraintWeight BestWeight = CW_Invalid; // Loop over the options, keeping track of the most general one. - for (unsigned i = 0, e = rCodes->size(); i != e; ++i) { + for (const std::string &rCode : *rCodes) { ConstraintWeight weight = - getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str()); + getSingleConstraintMatchWeight(info, rCode.c_str()); if (weight > BestWeight) BestWeight = weight; } @@ -6550,15 +6559,15 @@ static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) { true); } -bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result, - SelectionDAG &DAG) const { +SDValue TargetLowering::expandFunnelShift(SDNode *Node, + SelectionDAG &DAG) const { EVT VT = Node->getValueType(0); if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) || !isOperationLegalOrCustom(ISD::SRL, VT) || !isOperationLegalOrCustom(ISD::SUB, VT) || !isOperationLegalOrCustomOrPromote(ISD::OR, VT))) - return false; + return SDValue(); SDValue X = Node->getOperand(0); SDValue Y = Node->getOperand(1); @@ -6592,8 +6601,7 @@ bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result, } Z = DAG.getNOT(DL, Z, ShVT); } - Result = DAG.getNode(RevOpcode, DL, VT, X, Y, Z); - return true; + return DAG.getNode(RevOpcode, DL, VT, X, Y, Z); } SDValue ShX, ShY; @@ -6633,13 +6641,12 @@ bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result, ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt); } } - Result = DAG.getNode(ISD::OR, DL, VT, ShX, ShY); - return true; + return DAG.getNode(ISD::OR, DL, VT, ShX, ShY); } // TODO: Merge with expandFunnelShift. -bool TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps, - SDValue &Result, SelectionDAG &DAG) const { +SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps, + SelectionDAG &DAG) const { EVT VT = Node->getValueType(0); unsigned EltSizeInBits = VT.getScalarSizeInBits(); bool IsLeft = Node->getOpcode() == ISD::ROTL; @@ -6650,12 +6657,12 @@ bool TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps, EVT ShVT = Op1.getValueType(); SDValue Zero = DAG.getConstant(0, DL, ShVT); - // If a rotate in the other direction is supported, use it. + // If a rotate in the other direction is more supported, use it. unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL; - if (isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) { + if (!isOperationLegalOrCustom(Node->getOpcode(), VT) && + isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) { SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1); - Result = DAG.getNode(RevRot, DL, VT, Op0, Sub); - return true; + return DAG.getNode(RevRot, DL, VT, Op0, Sub); } if (!AllowVectorOps && VT.isVector() && @@ -6664,7 +6671,7 @@ bool TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps, !isOperationLegalOrCustom(ISD::SUB, VT) || !isOperationLegalOrCustomOrPromote(ISD::OR, VT) || !isOperationLegalOrCustomOrPromote(ISD::AND, VT))) - return false; + return SDValue(); unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL; unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL; @@ -6690,8 +6697,7 @@ bool TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps, HsVal = DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt); } - Result = DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal); - return true; + return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal); } void TargetLowering::expandShiftParts(SDNode *Node, SDValue &Lo, SDValue &Hi, @@ -8048,7 +8054,8 @@ SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const { if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT)) return DAG.UnrollVectorOp(Node); - SDValue Cond = DAG.getSetCC(DL, VT, Op0, Op1, CC); + EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC); return DAG.getSelect(DL, VT, Cond, Op0, Op1); } diff --git a/llvm/lib/CodeGen/ShadowStackGCLowering.cpp b/llvm/lib/CodeGen/ShadowStackGCLowering.cpp index 86b559fd6413..43a54ce33bf0 100644 --- a/llvm/lib/CodeGen/ShadowStackGCLowering.cpp +++ b/llvm/lib/CodeGen/ShadowStackGCLowering.cpp @@ -162,8 +162,8 @@ Type *ShadowStackGCLowering::GetConcreteStackEntryType(Function &F) { // doInitialization creates the generic version of this type. std::vector<Type *> EltTys; EltTys.push_back(StackEntryTy); - for (size_t I = 0; I != Roots.size(); I++) - EltTys.push_back(Roots[I].second->getAllocatedType()); + for (const std::pair<CallInst *, AllocaInst *> &Root : Roots) + EltTys.push_back(Root.second->getAllocatedType()); return StructType::create(EltTys, ("gc_stackentry." + F.getName()).str()); } @@ -240,8 +240,8 @@ void ShadowStackGCLowering::CollectRoots(Function &F) { SmallVector<std::pair<CallInst *, AllocaInst *>, 16> MetaRoots; for (BasicBlock &BB : F) - for (BasicBlock::iterator II = BB.begin(), E = BB.end(); II != E;) - if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) + for (Instruction &I : BB) + if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(&I)) if (Function *F = CI->getCalledFunction()) if (F->getIntrinsicID() == Intrinsic::gcroot) { std::pair<CallInst *, AllocaInst *> Pair = std::make_pair( @@ -377,9 +377,9 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) { // Delete the original allocas (which are no longer used) and the intrinsic // calls (which are no longer valid). Doing this last avoids invalidating // iterators. - for (unsigned I = 0, E = Roots.size(); I != E; ++I) { - Roots[I].first->eraseFromParent(); - Roots[I].second->eraseFromParent(); + for (std::pair<CallInst *, AllocaInst *> &Root : Roots) { + Root.first->eraseFromParent(); + Root.second->eraseFromParent(); } Roots.clear(); diff --git a/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp b/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp index 5ccfacfc26dc..3640296adbca 100644 --- a/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp +++ b/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp @@ -131,15 +131,15 @@ bool StackMapLiveness::calculateLiveness(MachineFunction &MF) { bool HasStackMap = false; // Reverse iterate over all instructions and add the current live register // set to an instruction if we encounter a patchpoint instruction. - for (auto I = MBB.rbegin(), E = MBB.rend(); I != E; ++I) { - if (I->getOpcode() == TargetOpcode::PATCHPOINT) { - addLiveOutSetToMI(MF, *I); + for (MachineInstr &MI : llvm::reverse(MBB)) { + if (MI.getOpcode() == TargetOpcode::PATCHPOINT) { + addLiveOutSetToMI(MF, MI); HasChanged = true; HasStackMap = true; ++NumStackMaps; } - LLVM_DEBUG(dbgs() << " " << LiveRegs << " " << *I); - LiveRegs.stepBackward(*I); + LLVM_DEBUG(dbgs() << " " << LiveRegs << " " << MI); + LiveRegs.stepBackward(MI); } ++NumBBsVisited; if (!HasStackMap) diff --git a/llvm/lib/CodeGen/StackProtector.cpp b/llvm/lib/CodeGen/StackProtector.cpp index 7445f77c955d..6765fd274686 100644 --- a/llvm/lib/CodeGen/StackProtector.cpp +++ b/llvm/lib/CodeGen/StackProtector.cpp @@ -162,7 +162,7 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge, } bool StackProtector::HasAddressTaken(const Instruction *AI, - uint64_t AllocSize) { + TypeSize AllocSize) { const DataLayout &DL = M->getDataLayout(); for (const User *U : AI->users()) { const auto *I = cast<Instruction>(U); @@ -170,7 +170,8 @@ bool StackProtector::HasAddressTaken(const Instruction *AI, // the bounds of the allocated object. Optional<MemoryLocation> MemLoc = MemoryLocation::getOrNone(I); if (MemLoc.hasValue() && MemLoc->Size.hasValue() && - MemLoc->Size.getValue() > AllocSize) + !TypeSize::isKnownGE(AllocSize, + TypeSize::getFixed(MemLoc->Size.getValue()))) return true; switch (I->getOpcode()) { case Instruction::Store: @@ -203,13 +204,19 @@ bool StackProtector::HasAddressTaken(const Instruction *AI, // would use it could also be out-of-bounds meaning stack protection is // required. const GetElementPtrInst *GEP = cast<GetElementPtrInst>(I); - unsigned TypeSize = DL.getIndexTypeSizeInBits(I->getType()); - APInt Offset(TypeSize, 0); - APInt MaxOffset(TypeSize, AllocSize); - if (!GEP->accumulateConstantOffset(DL, Offset) || Offset.ugt(MaxOffset)) + unsigned IndexSize = DL.getIndexTypeSizeInBits(I->getType()); + APInt Offset(IndexSize, 0); + if (!GEP->accumulateConstantOffset(DL, Offset)) + return true; + TypeSize OffsetSize = TypeSize::Fixed(Offset.getLimitedValue()); + if (!TypeSize::isKnownGT(AllocSize, OffsetSize)) return true; // Adjust AllocSize to be the space remaining after this offset. - if (HasAddressTaken(I, AllocSize - Offset.getLimitedValue())) + // We can't subtract a fixed size from a scalable one, so in that case + // assume the scalable value is of minimum size. + TypeSize NewAllocSize = + TypeSize::Fixed(AllocSize.getKnownMinValue()) - OffsetSize; + if (HasAddressTaken(I, NewAllocSize)) return true; break; } diff --git a/llvm/lib/CodeGen/StackSlotColoring.cpp b/llvm/lib/CodeGen/StackSlotColoring.cpp index f49ba5ccd447..17e6f51d0899 100644 --- a/llvm/lib/CodeGen/StackSlotColoring.cpp +++ b/llvm/lib/CodeGen/StackSlotColoring.cpp @@ -325,8 +325,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "Color spill slot intervals:\n"); bool Changed = false; - for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { - LiveInterval *li = SSIntervals[i]; + for (LiveInterval *li : SSIntervals) { int SS = Register::stackSlot2Index(li->reg()); int NewSS = ColorSlot(li); assert(NewSS >= 0 && "Stack coloring failed?"); @@ -338,8 +337,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { } LLVM_DEBUG(dbgs() << "\nSpill slots after coloring:\n"); - for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { - LiveInterval *li = SSIntervals[i]; + for (LiveInterval *li : SSIntervals) { int SS = Register::stackSlot2Index(li->reg()); li->setWeight(SlotWeights[SS]); } @@ -347,8 +345,8 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { llvm::stable_sort(SSIntervals, IntervalSorter()); #ifndef NDEBUG - for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) - LLVM_DEBUG(SSIntervals[i]->dump()); + for (LiveInterval *li : SSIntervals) + LLVM_DEBUG(li->dump()); LLVM_DEBUG(dbgs() << '\n'); #endif diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp index 54fc6ee45d00..68a7b80d6146 100644 --- a/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/llvm/lib/CodeGen/TailDuplicator.cpp @@ -207,35 +207,34 @@ bool TailDuplicator::tailDuplicateAndUpdate( // Add the new vregs as available values. DenseMap<Register, AvailableValsTy>::iterator LI = SSAUpdateVals.find(VReg); - for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { - MachineBasicBlock *SrcBB = LI->second[j].first; - Register SrcReg = LI->second[j].second; + for (std::pair<MachineBasicBlock *, Register> &J : LI->second) { + MachineBasicBlock *SrcBB = J.first; + Register SrcReg = J.second; SSAUpdate.AddAvailableValue(SrcBB, SrcReg); } + SmallVector<MachineOperand *> DebugUses; // Rewrite uses that are outside of the original def's block. - MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg); - // Only remove instructions after loop, as DBG_VALUE_LISTs with multiple - // uses of VReg may invalidate the use iterator when erased. - SmallPtrSet<MachineInstr *, 4> InstrsToRemove; - while (UI != MRI->use_end()) { - MachineOperand &UseMO = *UI; + for (MachineOperand &UseMO : + llvm::make_early_inc_range(MRI->use_operands(VReg))) { MachineInstr *UseMI = UseMO.getParent(); - ++UI; + // Rewrite debug uses last so that they can take advantage of any + // register mappings introduced by other users in its BB, since we + // cannot create new register definitions specifically for the debug + // instruction (as debug instructions should not affect CodeGen). if (UseMI->isDebugValue()) { - // SSAUpdate can replace the use with an undef. That creates - // a debug instruction that is a kill. - // FIXME: Should it SSAUpdate job to delete debug instructions - // instead of replacing the use with undef? - InstrsToRemove.insert(UseMI); + DebugUses.push_back(&UseMO); continue; } if (UseMI->getParent() == DefBB && !UseMI->isPHI()) continue; SSAUpdate.RewriteUse(UseMO); } - for (auto *MI : InstrsToRemove) - MI->eraseFromParent(); + for (auto *UseMO : DebugUses) { + MachineInstr *UseMI = UseMO->getParent(); + UseMO->setReg( + SSAUpdate.GetValueInMiddleOfBlock(UseMI->getParent(), true)); + } } SSAUpdateVRs.clear(); @@ -511,8 +510,8 @@ void TailDuplicator::updateSuccessorsPHIs( SSAUpdateVals.find(Reg); if (LI != SSAUpdateVals.end()) { // This register is defined in the tail block. - for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { - MachineBasicBlock *SrcBB = LI->second[j].first; + for (const std::pair<MachineBasicBlock *, Register> &J : LI->second) { + MachineBasicBlock *SrcBB = J.first; // If we didn't duplicate a bb into a particular predecessor, we // might still have added an entry to SSAUpdateVals to correcly // recompute SSA. If that case, avoid adding a dummy extra argument @@ -520,7 +519,7 @@ void TailDuplicator::updateSuccessorsPHIs( if (!SrcBB->isSuccessor(SuccBB)) continue; - Register SrcReg = LI->second[j].second; + Register SrcReg = J.second; if (Idx != 0) { MI.getOperand(Idx).setReg(SrcReg); MI.getOperand(Idx + 1).setMBB(SrcBB); @@ -531,8 +530,7 @@ void TailDuplicator::updateSuccessorsPHIs( } } else { // Live in tail block, must also be live in predecessors. - for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) { - MachineBasicBlock *SrcBB = TDBBs[j]; + for (MachineBasicBlock *SrcBB : TDBBs) { if (Idx != 0) { MI.getOperand(Idx).setReg(Reg); MI.getOperand(Idx + 1).setMBB(SrcBB); diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp index 5119dac36713..3f22cc4289f2 100644 --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -436,7 +436,7 @@ MachineInstr &TargetInstrInfo::duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const { assert(!Orig.isNotDuplicable() && "Instruction cannot be duplicated"); MachineFunction &MF = *MBB.getParent(); - return MF.CloneMachineInstrBundle(MBB, InsertBefore, Orig); + return MF.cloneMachineInstrBundle(MBB, InsertBefore, Orig); } // If the COPY instruction in MI can be folded to a stack operation, return @@ -1418,3 +1418,16 @@ void TargetInstrInfo::mergeOutliningCandidateAttributes( })) F.addFnAttr(Attribute::NoUnwind); } + +bool TargetInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, + unsigned &Flags) const { + // Some instrumentations create special TargetOpcode at the start which + // expands to special code sequences which must be present. + auto First = MBB.getFirstNonDebugInstr(); + if (First != MBB.end() && + (First->getOpcode() == TargetOpcode::FENTRY_CALL || + First->getOpcode() == TargetOpcode::PATCHABLE_FUNCTION_ENTER)) + return false; + + return true; +} diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index c0a7efff9e98..6fc6881f8736 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -1187,7 +1187,7 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI, // all stack slots), but we need to handle the different type of stackmap // operands and memory effects here. - if (!llvm::any_of(MI->operands(), + if (llvm::none_of(MI->operands(), [](MachineOperand &Operand) { return Operand.isFI(); })) return MBB; diff --git a/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/llvm/lib/CodeGen/TargetRegisterInfo.cpp index f4bb71535f7f..f5cb518fce3e 100644 --- a/llvm/lib/CodeGen/TargetRegisterInfo.cpp +++ b/llvm/lib/CodeGen/TargetRegisterInfo.cpp @@ -248,8 +248,8 @@ static void getAllocatableSetForRC(const MachineFunction &MF, const TargetRegisterClass *RC, BitVector &R){ assert(RC->isAllocatable() && "invalid for nonallocatable sets"); ArrayRef<MCPhysReg> Order = RC->getRawAllocationOrder(MF); - for (unsigned i = 0; i != Order.size(); ++i) - R.set(Order[i]); + for (MCPhysReg PR : Order) + R.set(PR); } BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF, diff --git a/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/llvm/lib/CodeGen/UnreachableBlockElim.cpp index c9a19948ff2f..3426a03b6083 100644 --- a/llvm/lib/CodeGen/UnreachableBlockElim.cpp +++ b/llvm/lib/CodeGen/UnreachableBlockElim.cpp @@ -144,23 +144,22 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { } // Actually remove the blocks now. - for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) { + for (MachineBasicBlock *BB : DeadBlocks) { // Remove any call site information for calls in the block. - for (auto &I : DeadBlocks[i]->instrs()) + for (auto &I : BB->instrs()) if (I.shouldUpdateCallSiteInfo()) - DeadBlocks[i]->getParent()->eraseCallSiteInfo(&I); + BB->getParent()->eraseCallSiteInfo(&I); - DeadBlocks[i]->eraseFromParent(); + BB->eraseFromParent(); } // Cleanup PHI nodes. - for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) { - MachineBasicBlock *BB = &*I; + for (MachineBasicBlock &BB : F) { // Prune unneeded PHI entries. - SmallPtrSet<MachineBasicBlock*, 8> preds(BB->pred_begin(), - BB->pred_end()); - MachineBasicBlock::iterator phi = BB->begin(); - while (phi != BB->end() && phi->isPHI()) { + SmallPtrSet<MachineBasicBlock*, 8> preds(BB.pred_begin(), + BB.pred_end()); + MachineBasicBlock::iterator phi = BB.begin(); + while (phi != BB.end() && phi->isPHI()) { for (unsigned i = phi->getNumOperands() - 1; i >= 2; i-=2) if (!preds.count(phi->getOperand(i).getMBB())) { phi->RemoveOperand(i); @@ -189,7 +188,7 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { // insert a COPY instead of simply replacing the output // with the input. const TargetInstrInfo *TII = F.getSubtarget().getInstrInfo(); - BuildMI(*BB, BB->getFirstNonPHI(), phi->getDebugLoc(), + BuildMI(BB, BB.getFirstNonPHI(), phi->getDebugLoc(), TII->get(TargetOpcode::COPY), OutputReg) .addReg(InputReg, getRegState(Input), InputSub); } diff --git a/llvm/lib/CodeGen/VLIWMachineScheduler.cpp b/llvm/lib/CodeGen/VLIWMachineScheduler.cpp new file mode 100644 index 000000000000..cbc5d9ec169b --- /dev/null +++ b/llvm/lib/CodeGen/VLIWMachineScheduler.cpp @@ -0,0 +1,1009 @@ +//===- VLIWMachineScheduler.cpp - VLIW-Focused Scheduling Pass ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// MachineScheduler schedules machine instructions after phi elimination. It +// preserves LiveIntervals so it can be invoked before register allocation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/VLIWMachineScheduler.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/DFAPacketizer.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/RegisterPressure.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <iomanip> +#include <limits> +#include <memory> +#include <sstream> + +using namespace llvm; + +#define DEBUG_TYPE "machine-scheduler" + +static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure", cl::Hidden, + cl::ZeroOrMore, cl::init(false)); + +static cl::opt<bool> UseNewerCandidate("use-newer-candidate", cl::Hidden, + cl::ZeroOrMore, cl::init(true)); + +static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level", + cl::Hidden, cl::ZeroOrMore, + cl::init(1)); + +// Check if the scheduler should penalize instructions that are available to +// early due to a zero-latency dependence. +static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden, + cl::ZeroOrMore, cl::init(true)); + +// This value is used to determine if a register class is a high pressure set. +// We compute the maximum number of registers needed and divided by the total +// available. Then, we compare the result to this value. +static cl::opt<float> RPThreshold("vliw-misched-reg-pressure", cl::Hidden, + cl::init(0.75f), + cl::desc("High register pressure threhold.")); + +VLIWResourceModel::VLIWResourceModel(const TargetSubtargetInfo &STI, + const TargetSchedModel *SM) + : TII(STI.getInstrInfo()), SchedModel(SM) { + ResourcesModel = createPacketizer(STI); + + // This hard requirement could be relaxed, + // but for now do not let it proceed. + assert(ResourcesModel && "Unimplemented CreateTargetScheduleState."); + + Packet.reserve(SchedModel->getIssueWidth()); + Packet.clear(); + ResourcesModel->clearResources(); +} + +void VLIWResourceModel::reset() { + Packet.clear(); + ResourcesModel->clearResources(); +} + +VLIWResourceModel::~VLIWResourceModel() { delete ResourcesModel; } + +/// Return true if there is a dependence between SUd and SUu. +bool VLIWResourceModel::hasDependence(const SUnit *SUd, const SUnit *SUu) { + if (SUd->Succs.size() == 0) + return false; + + for (const auto &S : SUd->Succs) { + // Since we do not add pseudos to packets, might as well + // ignore order dependencies. + if (S.isCtrl()) + continue; + + if (S.getSUnit() == SUu && S.getLatency() > 0) + return true; + } + return false; +} + +/// Check if scheduling of this SU is possible +/// in the current packet. +/// It is _not_ precise (statefull), it is more like +/// another heuristic. Many corner cases are figured +/// empirically. +bool VLIWResourceModel::isResourceAvailable(SUnit *SU, bool IsTop) { + if (!SU || !SU->getInstr()) + return false; + + // First see if the pipeline could receive this instruction + // in the current cycle. + switch (SU->getInstr()->getOpcode()) { + default: + if (!ResourcesModel->canReserveResources(*SU->getInstr())) + return false; + break; + case TargetOpcode::EXTRACT_SUBREG: + case TargetOpcode::INSERT_SUBREG: + case TargetOpcode::SUBREG_TO_REG: + case TargetOpcode::REG_SEQUENCE: + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::COPY: + case TargetOpcode::INLINEASM: + case TargetOpcode::INLINEASM_BR: + break; + } + + // Now see if there are no other dependencies to instructions already + // in the packet. + if (IsTop) { + for (unsigned i = 0, e = Packet.size(); i != e; ++i) + if (hasDependence(Packet[i], SU)) + return false; + } else { + for (unsigned i = 0, e = Packet.size(); i != e; ++i) + if (hasDependence(SU, Packet[i])) + return false; + } + return true; +} + +/// Keep track of available resources. +bool VLIWResourceModel::reserveResources(SUnit *SU, bool IsTop) { + bool startNewCycle = false; + // Artificially reset state. + if (!SU) { + reset(); + TotalPackets++; + return false; + } + // If this SU does not fit in the packet or the packet is now full + // start a new one. + if (!isResourceAvailable(SU, IsTop) || + Packet.size() >= SchedModel->getIssueWidth()) { + reset(); + TotalPackets++; + startNewCycle = true; + } + + switch (SU->getInstr()->getOpcode()) { + default: + ResourcesModel->reserveResources(*SU->getInstr()); + break; + case TargetOpcode::EXTRACT_SUBREG: + case TargetOpcode::INSERT_SUBREG: + case TargetOpcode::SUBREG_TO_REG: + case TargetOpcode::REG_SEQUENCE: + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: + case TargetOpcode::CFI_INSTRUCTION: + case TargetOpcode::EH_LABEL: + case TargetOpcode::COPY: + case TargetOpcode::INLINEASM: + case TargetOpcode::INLINEASM_BR: + break; + } + Packet.push_back(SU); + +#ifndef NDEBUG + LLVM_DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n"); + for (unsigned i = 0, e = Packet.size(); i != e; ++i) { + LLVM_DEBUG(dbgs() << "\t[" << i << "] SU("); + LLVM_DEBUG(dbgs() << Packet[i]->NodeNum << ")\t"); + LLVM_DEBUG(Packet[i]->getInstr()->dump()); + } +#endif + + return startNewCycle; +} + +DFAPacketizer * +VLIWResourceModel::createPacketizer(const TargetSubtargetInfo &STI) const { + return STI.getInstrInfo()->CreateTargetScheduleState(STI); +} + +/// schedule - Called back from MachineScheduler::runOnMachineFunction +/// after setting up the current scheduling region. [RegionBegin, RegionEnd) +/// only includes instructions that have DAG nodes, not scheduling boundaries. +void VLIWMachineScheduler::schedule() { + LLVM_DEBUG(dbgs() << "********** MI Converging Scheduling VLIW " + << printMBBReference(*BB) << " " << BB->getName() + << " in_func " << BB->getParent()->getName() + << " at loop depth " << MLI->getLoopDepth(BB) << " \n"); + + buildDAGWithRegPressure(); + + Topo.InitDAGTopologicalSorting(); + + // Postprocess the DAG to add platform-specific artificial dependencies. + postprocessDAG(); + + SmallVector<SUnit *, 8> TopRoots, BotRoots; + findRootsAndBiasEdges(TopRoots, BotRoots); + + // Initialize the strategy before modifying the DAG. + SchedImpl->initialize(this); + + LLVM_DEBUG({ + unsigned maxH = 0; + for (const SUnit &SU : SUnits) + if (SU.getHeight() > maxH) + maxH = SU.getHeight(); + dbgs() << "Max Height " << maxH << "\n"; + }); + LLVM_DEBUG({ + unsigned maxD = 0; + for (const SUnit &SU : SUnits) + if (SU.getDepth() > maxD) + maxD = SU.getDepth(); + dbgs() << "Max Depth " << maxD << "\n"; + }); + LLVM_DEBUG(dump()); + if (ViewMISchedDAGs) + viewGraph(); + + initQueues(TopRoots, BotRoots); + + bool IsTopNode = false; + while (true) { + LLVM_DEBUG( + dbgs() << "** VLIWMachineScheduler::schedule picking next node\n"); + SUnit *SU = SchedImpl->pickNode(IsTopNode); + if (!SU) + break; + + if (!checkSchedLimit()) + break; + + scheduleMI(SU, IsTopNode); + + // Notify the scheduling strategy after updating the DAG. + SchedImpl->schedNode(SU, IsTopNode); + + updateQueues(SU, IsTopNode); + } + assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); + + placeDebugValues(); + + LLVM_DEBUG({ + dbgs() << "*** Final schedule for " + << printMBBReference(*begin()->getParent()) << " ***\n"; + dumpSchedule(); + dbgs() << '\n'; + }); +} + +void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) { + DAG = static_cast<VLIWMachineScheduler *>(dag); + SchedModel = DAG->getSchedModel(); + + Top.init(DAG, SchedModel); + Bot.init(DAG, SchedModel); + + // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or + // are disabled, then these HazardRecs will be disabled. + const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries(); + const TargetSubtargetInfo &STI = DAG->MF.getSubtarget(); + const TargetInstrInfo *TII = STI.getInstrInfo(); + delete Top.HazardRec; + delete Bot.HazardRec; + Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG); + Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG); + + delete Top.ResourceModel; + delete Bot.ResourceModel; + Top.ResourceModel = createVLIWResourceModel(STI, DAG->getSchedModel()); + Bot.ResourceModel = createVLIWResourceModel(STI, DAG->getSchedModel()); + + const std::vector<unsigned> &MaxPressure = + DAG->getRegPressure().MaxSetPressure; + HighPressureSets.assign(MaxPressure.size(), 0); + for (unsigned i = 0, e = MaxPressure.size(); i < e; ++i) { + unsigned Limit = DAG->getRegClassInfo()->getRegPressureSetLimit(i); + HighPressureSets[i] = + ((float)MaxPressure[i] > ((float)Limit * RPThreshold)); + } + + assert((!ForceTopDown || !ForceBottomUp) && + "-misched-topdown incompatible with -misched-bottomup"); +} + +VLIWResourceModel *ConvergingVLIWScheduler::createVLIWResourceModel( + const TargetSubtargetInfo &STI, const TargetSchedModel *SchedModel) const { + return new VLIWResourceModel(STI, SchedModel); +} + +void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) { + for (const SDep &PI : SU->Preds) { + unsigned PredReadyCycle = PI.getSUnit()->TopReadyCycle; + unsigned MinLatency = PI.getLatency(); +#ifndef NDEBUG + Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); +#endif + if (SU->TopReadyCycle < PredReadyCycle + MinLatency) + SU->TopReadyCycle = PredReadyCycle + MinLatency; + } + + if (!SU->isScheduled) + Top.releaseNode(SU, SU->TopReadyCycle); +} + +void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) { + assert(SU->getInstr() && "Scheduled SUnit must have instr"); + + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; + ++I) { + unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; + unsigned MinLatency = I->getLatency(); +#ifndef NDEBUG + Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency); +#endif + if (SU->BotReadyCycle < SuccReadyCycle + MinLatency) + SU->BotReadyCycle = SuccReadyCycle + MinLatency; + } + + if (!SU->isScheduled) + Bot.releaseNode(SU, SU->BotReadyCycle); +} + +ConvergingVLIWScheduler::VLIWSchedBoundary::~VLIWSchedBoundary() { + delete ResourceModel; + delete HazardRec; +} + +/// Does this SU have a hazard within the current instruction group. +/// +/// The scheduler supports two modes of hazard recognition. The first is the +/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that +/// supports highly complicated in-order reservation tables +/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic. +/// +/// The second is a streamlined mechanism that checks for hazards based on +/// simple counters that the scheduler itself maintains. It explicitly checks +/// for instruction dispatch limitations, including the number of micro-ops that +/// can dispatch per cycle. +/// +/// TODO: Also check whether the SU must start a new group. +bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit *SU) { + if (HazardRec->isEnabled()) + return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; + + unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); + if (IssueCount + uops > SchedModel->getIssueWidth()) + return true; + + return false; +} + +void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode( + SUnit *SU, unsigned ReadyCycle) { + if (ReadyCycle < MinReadyCycle) + MinReadyCycle = ReadyCycle; + + // Check for interlocks first. For the purpose of other heuristics, an + // instruction that cannot issue appears as if it's not in the ReadyQueue. + if (ReadyCycle > CurrCycle || checkHazard(SU)) + + Pending.push(SU); + else + Available.push(SU); +} + +/// Move the boundary of scheduled code by one cycle. +void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() { + unsigned Width = SchedModel->getIssueWidth(); + IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; + + assert(MinReadyCycle < std::numeric_limits<unsigned>::max() && + "MinReadyCycle uninitialized"); + unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle); + + if (!HazardRec->isEnabled()) { + // Bypass HazardRec virtual calls. + CurrCycle = NextCycle; + } else { + // Bypass getHazardType calls in case of long latency. + for (; CurrCycle != NextCycle; ++CurrCycle) { + if (isTop()) + HazardRec->AdvanceCycle(); + else + HazardRec->RecedeCycle(); + } + } + CheckPending = true; + + LLVM_DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle " + << CurrCycle << '\n'); +} + +/// Move the boundary of scheduled code by one SUnit. +void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) { + bool startNewCycle = false; + + // Update the reservation table. + if (HazardRec->isEnabled()) { + if (!isTop() && SU->isCall) { + // Calls are scheduled with their preceding instructions. For bottom-up + // scheduling, clear the pipeline state before emitting. + HazardRec->Reset(); + } + HazardRec->EmitInstruction(SU); + } + + // Update DFA model. + startNewCycle = ResourceModel->reserveResources(SU, isTop()); + + // Check the instruction group dispatch limit. + // TODO: Check if this SU must end a dispatch group. + IssueCount += SchedModel->getNumMicroOps(SU->getInstr()); + if (startNewCycle) { + LLVM_DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n'); + bumpCycle(); + } else + LLVM_DEBUG(dbgs() << "*** IssueCount " << IssueCount << " at cycle " + << CurrCycle << '\n'); +} + +/// Release pending ready nodes in to the available queue. This makes them +/// visible to heuristics. +void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() { + // If the available queue is empty, it is safe to reset MinReadyCycle. + if (Available.empty()) + MinReadyCycle = std::numeric_limits<unsigned>::max(); + + // Check to see if any of the pending instructions are ready to issue. If + // so, add them to the available queue. + for (unsigned i = 0, e = Pending.size(); i != e; ++i) { + SUnit *SU = *(Pending.begin() + i); + unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; + + if (ReadyCycle < MinReadyCycle) + MinReadyCycle = ReadyCycle; + + if (ReadyCycle > CurrCycle) + continue; + + if (checkHazard(SU)) + continue; + + Available.push(SU); + Pending.remove(Pending.begin() + i); + --i; + --e; + } + CheckPending = false; +} + +/// Remove SU from the ready set for this boundary. +void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit *SU) { + if (Available.isInQueue(SU)) + Available.remove(Available.find(SU)); + else { + assert(Pending.isInQueue(SU) && "bad ready count"); + Pending.remove(Pending.find(SU)); + } +} + +/// If this queue only has one ready candidate, return it. As a side effect, +/// advance the cycle until at least one node is ready. If multiple instructions +/// are ready, return NULL. +SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() { + if (CheckPending) + releasePending(); + + auto AdvanceCycle = [this]() { + if (Available.empty()) + return true; + if (Available.size() == 1 && Pending.size() > 0) + return !ResourceModel->isResourceAvailable(*Available.begin(), isTop()) || + getWeakLeft(*Available.begin(), isTop()) != 0; + return false; + }; + for (unsigned i = 0; AdvanceCycle(); ++i) { + assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) && + "permanent hazard"); + (void)i; + ResourceModel->reserveResources(nullptr, isTop()); + bumpCycle(); + releasePending(); + } + if (Available.size() == 1) + return *Available.begin(); + return nullptr; +} + +#ifndef NDEBUG +void ConvergingVLIWScheduler::traceCandidate(const char *Label, + const ReadyQueue &Q, SUnit *SU, + int Cost, PressureChange P) { + dbgs() << Label << " " << Q.getName() << " "; + if (P.isValid()) + dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":" + << P.getUnitInc() << " "; + else + dbgs() << " "; + dbgs() << "cost(" << Cost << ")\t"; + DAG->dumpNode(*SU); +} + +// Very detailed queue dump, to be used with higher verbosity levels. +void ConvergingVLIWScheduler::readyQueueVerboseDump( + const RegPressureTracker &RPTracker, SchedCandidate &Candidate, + ReadyQueue &Q) { + RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker); + + dbgs() << ">>> " << Q.getName() << "\n"; + for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { + RegPressureDelta RPDelta; + TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta, + DAG->getRegionCriticalPSets(), + DAG->getRegPressure().MaxSetPressure); + std::stringstream dbgstr; + dbgstr << "SU(" << std::setw(3) << (*I)->NodeNum << ")"; + dbgs() << dbgstr.str(); + SchedulingCost(Q, *I, Candidate, RPDelta, true); + dbgs() << "\t"; + (*I)->getInstr()->dump(); + } + dbgs() << "\n"; +} +#endif + +/// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor +/// of SU, return true (we may have duplicates) +static inline bool isSingleUnscheduledPred(SUnit *SU, SUnit *SU2) { + if (SU->NumPredsLeft == 0) + return false; + + for (auto &Pred : SU->Preds) { + // We found an available, but not scheduled, predecessor. + if (!Pred.getSUnit()->isScheduled && (Pred.getSUnit() != SU2)) + return false; + } + + return true; +} + +/// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor +/// of SU, return true (we may have duplicates) +static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) { + if (SU->NumSuccsLeft == 0) + return false; + + for (auto &Succ : SU->Succs) { + // We found an available, but not scheduled, successor. + if (!Succ.getSUnit()->isScheduled && (Succ.getSUnit() != SU2)) + return false; + } + return true; +} + +/// Check if the instruction changes the register pressure of a register in the +/// high pressure set. The function returns a negative value if the pressure +/// decreases and a positive value is the pressure increases. If the instruction +/// doesn't use a high pressure register or doesn't change the register +/// pressure, then return 0. +int ConvergingVLIWScheduler::pressureChange(const SUnit *SU, bool isBotUp) { + PressureDiff &PD = DAG->getPressureDiff(SU); + for (auto &P : PD) { + if (!P.isValid()) + continue; + // The pressure differences are computed bottom-up, so the comparision for + // an increase is positive in the bottom direction, but negative in the + // top-down direction. + if (HighPressureSets[P.getPSet()]) + return (isBotUp ? P.getUnitInc() : -P.getUnitInc()); + } + return 0; +} + +/// Single point to compute overall scheduling cost. +/// TODO: More heuristics will be used soon. +int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, + SchedCandidate &Candidate, + RegPressureDelta &Delta, + bool verbose) { + // Initial trivial priority. + int ResCount = 1; + + // Do not waste time on a node that is already scheduled. + if (!SU || SU->isScheduled) + return ResCount; + + LLVM_DEBUG(if (verbose) dbgs() + << ((Q.getID() == TopQID) ? "(top|" : "(bot|")); + // Forced priority is high. + if (SU->isScheduleHigh) { + ResCount += PriorityOne; + LLVM_DEBUG(dbgs() << "H|"); + } + + unsigned IsAvailableAmt = 0; + // Critical path first. + if (Q.getID() == TopQID) { + if (Top.isLatencyBound(SU)) { + LLVM_DEBUG(if (verbose) dbgs() << "LB|"); + ResCount += (SU->getHeight() * ScaleTwo); + } + + LLVM_DEBUG(if (verbose) { + std::stringstream dbgstr; + dbgstr << "h" << std::setw(3) << SU->getHeight() << "|"; + dbgs() << dbgstr.str(); + }); + + // If resources are available for it, multiply the + // chance of scheduling. + if (Top.ResourceModel->isResourceAvailable(SU, true)) { + IsAvailableAmt = (PriorityTwo + PriorityThree); + ResCount += IsAvailableAmt; + LLVM_DEBUG(if (verbose) dbgs() << "A|"); + } else + LLVM_DEBUG(if (verbose) dbgs() << " |"); + } else { + if (Bot.isLatencyBound(SU)) { + LLVM_DEBUG(if (verbose) dbgs() << "LB|"); + ResCount += (SU->getDepth() * ScaleTwo); + } + + LLVM_DEBUG(if (verbose) { + std::stringstream dbgstr; + dbgstr << "d" << std::setw(3) << SU->getDepth() << "|"; + dbgs() << dbgstr.str(); + }); + + // If resources are available for it, multiply the + // chance of scheduling. + if (Bot.ResourceModel->isResourceAvailable(SU, false)) { + IsAvailableAmt = (PriorityTwo + PriorityThree); + ResCount += IsAvailableAmt; + LLVM_DEBUG(if (verbose) dbgs() << "A|"); + } else + LLVM_DEBUG(if (verbose) dbgs() << " |"); + } + + unsigned NumNodesBlocking = 0; + if (Q.getID() == TopQID) { + // How many SUs does it block from scheduling? + // Look at all of the successors of this node. + // Count the number of nodes that + // this node is the sole unscheduled node for. + if (Top.isLatencyBound(SU)) + for (const SDep &SI : SU->Succs) + if (isSingleUnscheduledPred(SI.getSUnit(), SU)) + ++NumNodesBlocking; + } else { + // How many unscheduled predecessors block this node? + if (Bot.isLatencyBound(SU)) + for (const SDep &PI : SU->Preds) + if (isSingleUnscheduledSucc(PI.getSUnit(), SU)) + ++NumNodesBlocking; + } + ResCount += (NumNodesBlocking * ScaleTwo); + + LLVM_DEBUG(if (verbose) { + std::stringstream dbgstr; + dbgstr << "blk " << std::setw(2) << NumNodesBlocking << ")|"; + dbgs() << dbgstr.str(); + }); + + // Factor in reg pressure as a heuristic. + if (!IgnoreBBRegPressure) { + // Decrease priority by the amount that register pressure exceeds the limit. + ResCount -= (Delta.Excess.getUnitInc() * PriorityOne); + // Decrease priority if register pressure exceeds the limit. + ResCount -= (Delta.CriticalMax.getUnitInc() * PriorityOne); + // Decrease priority slightly if register pressure would increase over the + // current maximum. + ResCount -= (Delta.CurrentMax.getUnitInc() * PriorityTwo); + // If there are register pressure issues, then we remove the value added for + // the instruction being available. The rationale is that we really don't + // want to schedule an instruction that causes a spill. + if (IsAvailableAmt && pressureChange(SU, Q.getID() != TopQID) > 0 && + (Delta.Excess.getUnitInc() || Delta.CriticalMax.getUnitInc() || + Delta.CurrentMax.getUnitInc())) + ResCount -= IsAvailableAmt; + LLVM_DEBUG(if (verbose) { + dbgs() << "RP " << Delta.Excess.getUnitInc() << "/" + << Delta.CriticalMax.getUnitInc() << "/" + << Delta.CurrentMax.getUnitInc() << ")|"; + }); + } + + // Give preference to a zero latency instruction if the dependent + // instruction is in the current packet. + if (Q.getID() == TopQID && getWeakLeft(SU, true) == 0) { + for (const SDep &PI : SU->Preds) { + if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() && + PI.getLatency() == 0 && + Top.ResourceModel->isInPacket(PI.getSUnit())) { + ResCount += PriorityThree; + LLVM_DEBUG(if (verbose) dbgs() << "Z|"); + } + } + } else if (Q.getID() == BotQID && getWeakLeft(SU, false) == 0) { + for (const SDep &SI : SU->Succs) { + if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() && + SI.getLatency() == 0 && + Bot.ResourceModel->isInPacket(SI.getSUnit())) { + ResCount += PriorityThree; + LLVM_DEBUG(if (verbose) dbgs() << "Z|"); + } + } + } + + // If the instruction has a non-zero latency dependence with an instruction in + // the current packet, then it should not be scheduled yet. The case occurs + // when the dependent instruction is scheduled in a new packet, so the + // scheduler updates the current cycle and pending instructions become + // available. + if (CheckEarlyAvail) { + if (Q.getID() == TopQID) { + for (const auto &PI : SU->Preds) { + if (PI.getLatency() > 0 && + Top.ResourceModel->isInPacket(PI.getSUnit())) { + ResCount -= PriorityOne; + LLVM_DEBUG(if (verbose) dbgs() << "D|"); + } + } + } else { + for (const auto &SI : SU->Succs) { + if (SI.getLatency() > 0 && + Bot.ResourceModel->isInPacket(SI.getSUnit())) { + ResCount -= PriorityOne; + LLVM_DEBUG(if (verbose) dbgs() << "D|"); + } + } + } + } + + LLVM_DEBUG(if (verbose) { + std::stringstream dbgstr; + dbgstr << "Total " << std::setw(4) << ResCount << ")"; + dbgs() << dbgstr.str(); + }); + + return ResCount; +} + +/// Pick the best candidate from the top queue. +/// +/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during +/// DAG building. To adjust for the current scheduling location we need to +/// maintain the number of vreg uses remaining to be top-scheduled. +ConvergingVLIWScheduler::CandResult +ConvergingVLIWScheduler::pickNodeFromQueue(VLIWSchedBoundary &Zone, + const RegPressureTracker &RPTracker, + SchedCandidate &Candidate) { + ReadyQueue &Q = Zone.Available; + LLVM_DEBUG(if (SchedDebugVerboseLevel > 1) + readyQueueVerboseDump(RPTracker, Candidate, Q); + else Q.dump();); + + // getMaxPressureDelta temporarily modifies the tracker. + RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker); + + // BestSU remains NULL if no top candidates beat the best existing candidate. + CandResult FoundCandidate = NoCand; + for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { + RegPressureDelta RPDelta; + TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta, + DAG->getRegionCriticalPSets(), + DAG->getRegPressure().MaxSetPressure); + + int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false); + + // Initialize the candidate if needed. + if (!Candidate.SU) { + LLVM_DEBUG(traceCandidate("DCAND", Q, *I, CurrentCost)); + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + Candidate.SCost = CurrentCost; + FoundCandidate = NodeOrder; + continue; + } + + // Choose node order for negative cost candidates. There is no good + // candidate in this case. + if (CurrentCost < 0 && Candidate.SCost < 0) { + if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) || + (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) { + LLVM_DEBUG(traceCandidate("NCAND", Q, *I, CurrentCost)); + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + Candidate.SCost = CurrentCost; + FoundCandidate = NodeOrder; + } + continue; + } + + // Best cost. + if (CurrentCost > Candidate.SCost) { + LLVM_DEBUG(traceCandidate("CCAND", Q, *I, CurrentCost)); + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + Candidate.SCost = CurrentCost; + FoundCandidate = BestCost; + continue; + } + + // Choose an instruction that does not depend on an artificial edge. + unsigned CurrWeak = getWeakLeft(*I, (Q.getID() == TopQID)); + unsigned CandWeak = getWeakLeft(Candidate.SU, (Q.getID() == TopQID)); + if (CurrWeak != CandWeak) { + if (CurrWeak < CandWeak) { + LLVM_DEBUG(traceCandidate("WCAND", Q, *I, CurrentCost)); + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + Candidate.SCost = CurrentCost; + FoundCandidate = Weak; + } + continue; + } + + if (CurrentCost == Candidate.SCost && Zone.isLatencyBound(*I)) { + unsigned CurrSize, CandSize; + if (Q.getID() == TopQID) { + CurrSize = (*I)->Succs.size(); + CandSize = Candidate.SU->Succs.size(); + } else { + CurrSize = (*I)->Preds.size(); + CandSize = Candidate.SU->Preds.size(); + } + if (CurrSize > CandSize) { + LLVM_DEBUG(traceCandidate("SPCAND", Q, *I, CurrentCost)); + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + Candidate.SCost = CurrentCost; + FoundCandidate = BestCost; + } + // Keep the old candidate if it's a better candidate. That is, don't use + // the subsequent tie breaker. + if (CurrSize != CandSize) + continue; + } + + // Tie breaker. + // To avoid scheduling indeterminism, we need a tie breaker + // for the case when cost is identical for two nodes. + if (UseNewerCandidate && CurrentCost == Candidate.SCost) { + if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) || + (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) { + LLVM_DEBUG(traceCandidate("TCAND", Q, *I, CurrentCost)); + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + Candidate.SCost = CurrentCost; + FoundCandidate = NodeOrder; + continue; + } + } + + // Fall through to original instruction order. + // Only consider node order if Candidate was chosen from this Q. + if (FoundCandidate == NoCand) + continue; + } + return FoundCandidate; +} + +/// Pick the best candidate node from either the top or bottom queue. +SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) { + // Schedule as far as possible in the direction of no choice. This is most + // efficient, but also provides the best heuristics for CriticalPSets. + if (SUnit *SU = Bot.pickOnlyChoice()) { + LLVM_DEBUG(dbgs() << "Picked only Bottom\n"); + IsTopNode = false; + return SU; + } + if (SUnit *SU = Top.pickOnlyChoice()) { + LLVM_DEBUG(dbgs() << "Picked only Top\n"); + IsTopNode = true; + return SU; + } + SchedCandidate BotCand; + // Prefer bottom scheduling when heuristics are silent. + CandResult BotResult = + pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); + assert(BotResult != NoCand && "failed to find the first candidate"); + + // If either Q has a single candidate that provides the least increase in + // Excess pressure, we can immediately schedule from that Q. + // + // RegionCriticalPSets summarizes the pressure within the scheduled region and + // affects picking from either Q. If scheduling in one direction must + // increase pressure for one of the excess PSets, then schedule in that + // direction first to provide more freedom in the other direction. + if (BotResult == SingleExcess || BotResult == SingleCritical) { + LLVM_DEBUG(dbgs() << "Prefered Bottom Node\n"); + IsTopNode = false; + return BotCand.SU; + } + // Check if the top Q has a better candidate. + SchedCandidate TopCand; + CandResult TopResult = + pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); + assert(TopResult != NoCand && "failed to find the first candidate"); + + if (TopResult == SingleExcess || TopResult == SingleCritical) { + LLVM_DEBUG(dbgs() << "Prefered Top Node\n"); + IsTopNode = true; + return TopCand.SU; + } + // If either Q has a single candidate that minimizes pressure above the + // original region's pressure pick it. + if (BotResult == SingleMax) { + LLVM_DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n"); + IsTopNode = false; + return BotCand.SU; + } + if (TopResult == SingleMax) { + LLVM_DEBUG(dbgs() << "Prefered Top Node SingleMax\n"); + IsTopNode = true; + return TopCand.SU; + } + if (TopCand.SCost > BotCand.SCost) { + LLVM_DEBUG(dbgs() << "Prefered Top Node Cost\n"); + IsTopNode = true; + return TopCand.SU; + } + // Otherwise prefer the bottom candidate in node order. + LLVM_DEBUG(dbgs() << "Prefered Bottom in Node order\n"); + IsTopNode = false; + return BotCand.SU; +} + +/// Pick the best node to balance the schedule. Implements MachineSchedStrategy. +SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) { + if (DAG->top() == DAG->bottom()) { + assert(Top.Available.empty() && Top.Pending.empty() && + Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); + return nullptr; + } + SUnit *SU; + if (ForceTopDown) { + SU = Top.pickOnlyChoice(); + if (!SU) { + SchedCandidate TopCand; + CandResult TopResult = + pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); + assert(TopResult != NoCand && "failed to find the first candidate"); + (void)TopResult; + SU = TopCand.SU; + } + IsTopNode = true; + } else if (ForceBottomUp) { + SU = Bot.pickOnlyChoice(); + if (!SU) { + SchedCandidate BotCand; + CandResult BotResult = + pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); + assert(BotResult != NoCand && "failed to find the first candidate"); + (void)BotResult; + SU = BotCand.SU; + } + IsTopNode = false; + } else { + SU = pickNodeBidrectional(IsTopNode); + } + if (SU->isTopReady()) + Top.removeReady(SU); + if (SU->isBottomReady()) + Bot.removeReady(SU); + + LLVM_DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom") + << " Scheduling instruction in cycle " + << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << " (" + << reportPackets() << ")\n"; + DAG->dumpNode(*SU)); + return SU; +} + +/// Update the scheduler's state after scheduling a node. This is the same node +/// that was just returned by pickNode(). However, VLIWMachineScheduler needs +/// to update it's state based on the current cycle before MachineSchedStrategy +/// does. +void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) { + if (IsTopNode) { + Top.bumpNode(SU); + SU->TopReadyCycle = Top.CurrCycle; + } else { + Bot.bumpNode(SU); + SU->BotReadyCycle = Bot.CurrCycle; + } +} diff --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp index 4876b9e23717..0c42bef82005 100644 --- a/llvm/lib/CodeGen/ValueTypes.cpp +++ b/llvm/lib/CodeGen/ValueTypes.cpp @@ -201,9 +201,11 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { case MVT::x86amx: return Type::getX86_AMXTy(Context); case MVT::i64x8: return IntegerType::get(Context, 512); case MVT::externref: + // pointer to opaque struct in addrspace(10) return PointerType::get(StructType::create(Context), 10); case MVT::funcref: - return PointerType::get(StructType::create(Context), 20); + // pointer to i8 addrspace(20) + return PointerType::get(Type::getInt8Ty(Context), 20); case MVT::v1i1: return FixedVectorType::get(Type::getInt1Ty(Context), 1); case MVT::v2i1: diff --git a/llvm/lib/CodeGen/WinEHPrepare.cpp b/llvm/lib/CodeGen/WinEHPrepare.cpp index 4564aa1c1278..d31183e46d65 100644 --- a/llvm/lib/CodeGen/WinEHPrepare.cpp +++ b/llvm/lib/CodeGen/WinEHPrepare.cpp @@ -573,9 +573,7 @@ void llvm::calculateClrEHStateNumbers(const Function *Fn, const auto *CatchSwitch = cast<CatchSwitchInst>(Pad); int CatchState = -1, FollowerState = -1; SmallVector<const BasicBlock *, 4> CatchBlocks(CatchSwitch->handlers()); - for (auto CBI = CatchBlocks.rbegin(), CBE = CatchBlocks.rend(); - CBI != CBE; ++CBI, FollowerState = CatchState) { - const BasicBlock *CatchBlock = *CBI; + for (const BasicBlock *CatchBlock : llvm::reverse(CatchBlocks)) { // Create the entry for this catch with the appropriate handler // properties. const auto *Catch = cast<CatchPadInst>(CatchBlock->getFirstNonPHI()); @@ -591,6 +589,7 @@ void llvm::calculateClrEHStateNumbers(const Function *Fn, Worklist.emplace_back(I, CatchState); // Remember this catch's state. FuncInfo.EHPadStateMap[Catch] = CatchState; + FollowerState = CatchState; } // Associate the catchswitch with the state of its first catch. assert(CatchSwitch->getNumHandlers()); @@ -601,11 +600,9 @@ void llvm::calculateClrEHStateNumbers(const Function *Fn, // Step two: record the TryParentState of each state. For cleanuppads that // don't have cleanuprets, we may need to infer this from their child pads, // so visit pads in descendant-most to ancestor-most order. - for (auto Entry = FuncInfo.ClrEHUnwindMap.rbegin(), - End = FuncInfo.ClrEHUnwindMap.rend(); - Entry != End; ++Entry) { + for (ClrEHUnwindMapEntry &Entry : llvm::reverse(FuncInfo.ClrEHUnwindMap)) { const Instruction *Pad = - Entry->Handler.get<const BasicBlock *>()->getFirstNonPHI(); + Entry.Handler.get<const BasicBlock *>()->getFirstNonPHI(); // For most pads, the TryParentState is the state associated with the // unwind dest of exceptional exits from it. const BasicBlock *UnwindDest; @@ -615,7 +612,7 @@ void llvm::calculateClrEHStateNumbers(const Function *Fn, // that's not the unwind dest of exceptions escaping the catch. Those // cases were already assigned a TryParentState in the first pass, so // skip them. - if (Entry->TryParentState != -1) + if (Entry.TryParentState != -1) continue; // Otherwise, get the unwind dest from the catchswitch. UnwindDest = Catch->getCatchSwitch()->getUnwindDest(); @@ -692,7 +689,7 @@ void llvm::calculateClrEHStateNumbers(const Function *Fn, UnwindDestState = FuncInfo.EHPadStateMap[UnwindDest->getFirstNonPHI()]; } - Entry->TryParentState = UnwindDestState; + Entry.TryParentState = UnwindDestState; } // Step three: transfer information from pads to invokes. diff --git a/llvm/lib/CodeGen/XRayInstrumentation.cpp b/llvm/lib/CodeGen/XRayInstrumentation.cpp index 11d1b309aa64..b66429d8a5bf 100644 --- a/llvm/lib/CodeGen/XRayInstrumentation.cpp +++ b/llvm/lib/CodeGen/XRayInstrumentation.cpp @@ -226,6 +226,7 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) { case Triple::ArchType::arm: case Triple::ArchType::thumb: case Triple::ArchType::aarch64: + case Triple::ArchType::hexagon: case Triple::ArchType::mips: case Triple::ArchType::mipsel: case Triple::ArchType::mips64: |